1*4757b351SPierre Pronchery/* Do not modify. This file is auto-generated from vpsm4-armv8.pl. */ 2*4757b351SPierre Pronchery// Copyright 2020-2025 The OpenSSL Project Authors. All Rights Reserved. 3*4757b351SPierre Pronchery// 4*4757b351SPierre Pronchery// Licensed under the Apache License 2.0 (the "License"). You may not use 5*4757b351SPierre Pronchery// this file except in compliance with the License. You can obtain a copy 6*4757b351SPierre Pronchery// in the file LICENSE in the source distribution or at 7*4757b351SPierre Pronchery// https://www.openssl.org/source/license.html 8*4757b351SPierre Pronchery 9*4757b351SPierre Pronchery// 10*4757b351SPierre Pronchery// This module implements SM4 with ASIMD on aarch64 11*4757b351SPierre Pronchery// 12*4757b351SPierre Pronchery// Feb 2022 13*4757b351SPierre Pronchery// 14*4757b351SPierre Pronchery 15*4757b351SPierre Pronchery// $output is the last argument if it looks like a file (it has an extension) 16*4757b351SPierre Pronchery// $flavour is the first argument if it doesn't look like a file 17*4757b351SPierre Pronchery#include "arm_arch.h" 18*4757b351SPierre Pronchery.arch armv8-a 19*4757b351SPierre Pronchery.text 20*4757b351SPierre Pronchery 21*4757b351SPierre Pronchery.section .rodata 22*4757b351SPierre Pronchery.type _vpsm4_consts,%object 23*4757b351SPierre Pronchery.align 7 24*4757b351SPierre Pronchery_vpsm4_consts: 25*4757b351SPierre Pronchery.Lsbox: 26*4757b351SPierre Pronchery.byte 0xD6,0x90,0xE9,0xFE,0xCC,0xE1,0x3D,0xB7,0x16,0xB6,0x14,0xC2,0x28,0xFB,0x2C,0x05 27*4757b351SPierre Pronchery.byte 0x2B,0x67,0x9A,0x76,0x2A,0xBE,0x04,0xC3,0xAA,0x44,0x13,0x26,0x49,0x86,0x06,0x99 28*4757b351SPierre Pronchery.byte 0x9C,0x42,0x50,0xF4,0x91,0xEF,0x98,0x7A,0x33,0x54,0x0B,0x43,0xED,0xCF,0xAC,0x62 29*4757b351SPierre Pronchery.byte 0xE4,0xB3,0x1C,0xA9,0xC9,0x08,0xE8,0x95,0x80,0xDF,0x94,0xFA,0x75,0x8F,0x3F,0xA6 30*4757b351SPierre Pronchery.byte 0x47,0x07,0xA7,0xFC,0xF3,0x73,0x17,0xBA,0x83,0x59,0x3C,0x19,0xE6,0x85,0x4F,0xA8 31*4757b351SPierre Pronchery.byte 0x68,0x6B,0x81,0xB2,0x71,0x64,0xDA,0x8B,0xF8,0xEB,0x0F,0x4B,0x70,0x56,0x9D,0x35 32*4757b351SPierre Pronchery.byte 0x1E,0x24,0x0E,0x5E,0x63,0x58,0xD1,0xA2,0x25,0x22,0x7C,0x3B,0x01,0x21,0x78,0x87 33*4757b351SPierre Pronchery.byte 0xD4,0x00,0x46,0x57,0x9F,0xD3,0x27,0x52,0x4C,0x36,0x02,0xE7,0xA0,0xC4,0xC8,0x9E 34*4757b351SPierre Pronchery.byte 0xEA,0xBF,0x8A,0xD2,0x40,0xC7,0x38,0xB5,0xA3,0xF7,0xF2,0xCE,0xF9,0x61,0x15,0xA1 35*4757b351SPierre Pronchery.byte 0xE0,0xAE,0x5D,0xA4,0x9B,0x34,0x1A,0x55,0xAD,0x93,0x32,0x30,0xF5,0x8C,0xB1,0xE3 36*4757b351SPierre Pronchery.byte 0x1D,0xF6,0xE2,0x2E,0x82,0x66,0xCA,0x60,0xC0,0x29,0x23,0xAB,0x0D,0x53,0x4E,0x6F 37*4757b351SPierre Pronchery.byte 0xD5,0xDB,0x37,0x45,0xDE,0xFD,0x8E,0x2F,0x03,0xFF,0x6A,0x72,0x6D,0x6C,0x5B,0x51 38*4757b351SPierre Pronchery.byte 0x8D,0x1B,0xAF,0x92,0xBB,0xDD,0xBC,0x7F,0x11,0xD9,0x5C,0x41,0x1F,0x10,0x5A,0xD8 39*4757b351SPierre Pronchery.byte 0x0A,0xC1,0x31,0x88,0xA5,0xCD,0x7B,0xBD,0x2D,0x74,0xD0,0x12,0xB8,0xE5,0xB4,0xB0 40*4757b351SPierre Pronchery.byte 0x89,0x69,0x97,0x4A,0x0C,0x96,0x77,0x7E,0x65,0xB9,0xF1,0x09,0xC5,0x6E,0xC6,0x84 41*4757b351SPierre Pronchery.byte 0x18,0xF0,0x7D,0xEC,0x3A,0xDC,0x4D,0x20,0x79,0xEE,0x5F,0x3E,0xD7,0xCB,0x39,0x48 42*4757b351SPierre Pronchery.Lck: 43*4757b351SPierre Pronchery.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 44*4757b351SPierre Pronchery.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 45*4757b351SPierre Pronchery.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 46*4757b351SPierre Pronchery.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 47*4757b351SPierre Pronchery.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 48*4757b351SPierre Pronchery.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 49*4757b351SPierre Pronchery.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 50*4757b351SPierre Pronchery.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 51*4757b351SPierre Pronchery.Lfk: 52*4757b351SPierre Pronchery.quad 0x56aa3350a3b1bac6,0xb27022dc677d9197 53*4757b351SPierre Pronchery.Lshuffles: 54*4757b351SPierre Pronchery.quad 0x0B0A090807060504,0x030201000F0E0D0C 55*4757b351SPierre Pronchery.Lxts_magic: 56*4757b351SPierre Pronchery.quad 0x0101010101010187,0x0101010101010101 57*4757b351SPierre Pronchery 58*4757b351SPierre Pronchery.size _vpsm4_consts,.-_vpsm4_consts 59*4757b351SPierre Pronchery 60*4757b351SPierre Pronchery.previous 61*4757b351SPierre Pronchery 62*4757b351SPierre Pronchery.type _vpsm4_set_key,%function 63*4757b351SPierre Pronchery.align 4 64*4757b351SPierre Pronchery_vpsm4_set_key: 65*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 66*4757b351SPierre Pronchery ld1 {v5.4s},[x0] 67*4757b351SPierre Pronchery adrp x10,.Lsbox 68*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 69*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 70*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 71*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 72*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 73*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 74*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 75*4757b351SPierre Pronchery#endif 76*4757b351SPierre Pronchery adrp x5,.Lshuffles 77*4757b351SPierre Pronchery add x5,x5,#:lo12:.Lshuffles 78*4757b351SPierre Pronchery ld1 {v7.2d},[x5] 79*4757b351SPierre Pronchery adrp x5,.Lfk 80*4757b351SPierre Pronchery add x5,x5,#:lo12:.Lfk 81*4757b351SPierre Pronchery ld1 {v6.2d},[x5] 82*4757b351SPierre Pronchery eor v5.16b,v5.16b,v6.16b 83*4757b351SPierre Pronchery mov x6,#32 84*4757b351SPierre Pronchery adrp x5,.Lck 85*4757b351SPierre Pronchery add x5,x5,#:lo12:.Lck 86*4757b351SPierre Pronchery movi v0.16b,#64 87*4757b351SPierre Pronchery cbnz w2,1f 88*4757b351SPierre Pronchery add x1,x1,124 89*4757b351SPierre Pronchery1: 90*4757b351SPierre Pronchery mov w7,v5.s[1] 91*4757b351SPierre Pronchery ldr w8,[x5],#4 92*4757b351SPierre Pronchery eor w8,w8,w7 93*4757b351SPierre Pronchery mov w7,v5.s[2] 94*4757b351SPierre Pronchery eor w8,w8,w7 95*4757b351SPierre Pronchery mov w7,v5.s[3] 96*4757b351SPierre Pronchery eor w8,w8,w7 97*4757b351SPierre Pronchery // sbox lookup 98*4757b351SPierre Pronchery mov v4.s[0],w8 99*4757b351SPierre Pronchery tbl v1.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v4.16b 100*4757b351SPierre Pronchery sub v4.16b,v4.16b,v0.16b 101*4757b351SPierre Pronchery tbx v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v4.16b 102*4757b351SPierre Pronchery sub v4.16b,v4.16b,v0.16b 103*4757b351SPierre Pronchery tbx v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v4.16b 104*4757b351SPierre Pronchery sub v4.16b,v4.16b,v0.16b 105*4757b351SPierre Pronchery tbx v1.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v4.16b 106*4757b351SPierre Pronchery mov w7,v1.s[0] 107*4757b351SPierre Pronchery eor w8,w7,w7,ror #19 108*4757b351SPierre Pronchery eor w8,w8,w7,ror #9 109*4757b351SPierre Pronchery mov w7,v5.s[0] 110*4757b351SPierre Pronchery eor w8,w8,w7 111*4757b351SPierre Pronchery mov v5.s[0],w8 112*4757b351SPierre Pronchery cbz w2,2f 113*4757b351SPierre Pronchery str w8,[x1],#4 114*4757b351SPierre Pronchery b 3f 115*4757b351SPierre Pronchery2: 116*4757b351SPierre Pronchery str w8,[x1],#-4 117*4757b351SPierre Pronchery3: 118*4757b351SPierre Pronchery tbl v5.16b,{v5.16b},v7.16b 119*4757b351SPierre Pronchery subs x6,x6,#1 120*4757b351SPierre Pronchery b.ne 1b 121*4757b351SPierre Pronchery ret 122*4757b351SPierre Pronchery.size _vpsm4_set_key,.-_vpsm4_set_key 123*4757b351SPierre Pronchery.type _vpsm4_enc_4blks,%function 124*4757b351SPierre Pronchery.align 4 125*4757b351SPierre Pronchery_vpsm4_enc_4blks: 126*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 127*4757b351SPierre Pronchery mov x10,x3 128*4757b351SPierre Pronchery mov w11,#8 129*4757b351SPierre Pronchery10: 130*4757b351SPierre Pronchery ldp w7,w8,[x10],8 131*4757b351SPierre Pronchery dup v12.4s,w7 132*4757b351SPierre Pronchery dup v13.4s,w8 133*4757b351SPierre Pronchery 134*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 135*4757b351SPierre Pronchery eor v14.16b,v6.16b,v7.16b 136*4757b351SPierre Pronchery eor v12.16b,v5.16b,v12.16b 137*4757b351SPierre Pronchery eor v12.16b,v14.16b,v12.16b 138*4757b351SPierre Pronchery movi v0.16b,#64 139*4757b351SPierre Pronchery movi v1.16b,#128 140*4757b351SPierre Pronchery movi v2.16b,#192 141*4757b351SPierre Pronchery sub v0.16b,v12.16b,v0.16b 142*4757b351SPierre Pronchery sub v1.16b,v12.16b,v1.16b 143*4757b351SPierre Pronchery sub v2.16b,v12.16b,v2.16b 144*4757b351SPierre Pronchery tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b 145*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 146*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 147*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 148*4757b351SPierre Pronchery add v0.2d,v0.2d,v1.2d 149*4757b351SPierre Pronchery add v2.2d,v2.2d,v12.2d 150*4757b351SPierre Pronchery add v12.2d,v0.2d,v2.2d 151*4757b351SPierre Pronchery 152*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-2 153*4757b351SPierre Pronchery sli v0.4s,v12.4s,2 154*4757b351SPierre Pronchery ushr v2.4s,v12.4s,32-10 155*4757b351SPierre Pronchery eor v1.16b,v0.16b,v12.16b 156*4757b351SPierre Pronchery sli v2.4s,v12.4s,10 157*4757b351SPierre Pronchery eor v1.16b,v2.16b,v1.16b 158*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-18 159*4757b351SPierre Pronchery sli v0.4s,v12.4s,18 160*4757b351SPierre Pronchery ushr v2.4s,v12.4s,32-24 161*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 162*4757b351SPierre Pronchery sli v2.4s,v12.4s,24 163*4757b351SPierre Pronchery eor v12.16b,v2.16b,v1.16b 164*4757b351SPierre Pronchery eor v4.16b,v4.16b,v12.16b 165*4757b351SPierre Pronchery 166*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 167*4757b351SPierre Pronchery eor v14.16b,v14.16b,v4.16b 168*4757b351SPierre Pronchery eor v13.16b,v14.16b,v13.16b 169*4757b351SPierre Pronchery movi v0.16b,#64 170*4757b351SPierre Pronchery movi v1.16b,#128 171*4757b351SPierre Pronchery movi v2.16b,#192 172*4757b351SPierre Pronchery sub v0.16b,v13.16b,v0.16b 173*4757b351SPierre Pronchery sub v1.16b,v13.16b,v1.16b 174*4757b351SPierre Pronchery sub v2.16b,v13.16b,v2.16b 175*4757b351SPierre Pronchery tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b 176*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 177*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 178*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 179*4757b351SPierre Pronchery add v0.2d,v0.2d,v1.2d 180*4757b351SPierre Pronchery add v2.2d,v2.2d,v13.2d 181*4757b351SPierre Pronchery add v13.2d,v0.2d,v2.2d 182*4757b351SPierre Pronchery 183*4757b351SPierre Pronchery ushr v0.4s,v13.4s,32-2 184*4757b351SPierre Pronchery sli v0.4s,v13.4s,2 185*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-10 186*4757b351SPierre Pronchery eor v1.16b,v0.16b,v13.16b 187*4757b351SPierre Pronchery sli v2.4s,v13.4s,10 188*4757b351SPierre Pronchery eor v1.16b,v2.16b,v1.16b 189*4757b351SPierre Pronchery ushr v0.4s,v13.4s,32-18 190*4757b351SPierre Pronchery sli v0.4s,v13.4s,18 191*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-24 192*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 193*4757b351SPierre Pronchery sli v2.4s,v13.4s,24 194*4757b351SPierre Pronchery eor v13.16b,v2.16b,v1.16b 195*4757b351SPierre Pronchery ldp w7,w8,[x10],8 196*4757b351SPierre Pronchery eor v5.16b,v5.16b,v13.16b 197*4757b351SPierre Pronchery 198*4757b351SPierre Pronchery dup v12.4s,w7 199*4757b351SPierre Pronchery dup v13.4s,w8 200*4757b351SPierre Pronchery 201*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 202*4757b351SPierre Pronchery eor v14.16b,v4.16b,v5.16b 203*4757b351SPierre Pronchery eor v12.16b,v7.16b,v12.16b 204*4757b351SPierre Pronchery eor v12.16b,v14.16b,v12.16b 205*4757b351SPierre Pronchery movi v0.16b,#64 206*4757b351SPierre Pronchery movi v1.16b,#128 207*4757b351SPierre Pronchery movi v2.16b,#192 208*4757b351SPierre Pronchery sub v0.16b,v12.16b,v0.16b 209*4757b351SPierre Pronchery sub v1.16b,v12.16b,v1.16b 210*4757b351SPierre Pronchery sub v2.16b,v12.16b,v2.16b 211*4757b351SPierre Pronchery tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b 212*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 213*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 214*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 215*4757b351SPierre Pronchery add v0.2d,v0.2d,v1.2d 216*4757b351SPierre Pronchery add v2.2d,v2.2d,v12.2d 217*4757b351SPierre Pronchery add v12.2d,v0.2d,v2.2d 218*4757b351SPierre Pronchery 219*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-2 220*4757b351SPierre Pronchery sli v0.4s,v12.4s,2 221*4757b351SPierre Pronchery ushr v2.4s,v12.4s,32-10 222*4757b351SPierre Pronchery eor v1.16b,v0.16b,v12.16b 223*4757b351SPierre Pronchery sli v2.4s,v12.4s,10 224*4757b351SPierre Pronchery eor v1.16b,v2.16b,v1.16b 225*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-18 226*4757b351SPierre Pronchery sli v0.4s,v12.4s,18 227*4757b351SPierre Pronchery ushr v2.4s,v12.4s,32-24 228*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 229*4757b351SPierre Pronchery sli v2.4s,v12.4s,24 230*4757b351SPierre Pronchery eor v12.16b,v2.16b,v1.16b 231*4757b351SPierre Pronchery eor v6.16b,v6.16b,v12.16b 232*4757b351SPierre Pronchery 233*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 234*4757b351SPierre Pronchery eor v14.16b,v14.16b,v6.16b 235*4757b351SPierre Pronchery eor v13.16b,v14.16b,v13.16b 236*4757b351SPierre Pronchery movi v0.16b,#64 237*4757b351SPierre Pronchery movi v1.16b,#128 238*4757b351SPierre Pronchery movi v2.16b,#192 239*4757b351SPierre Pronchery sub v0.16b,v13.16b,v0.16b 240*4757b351SPierre Pronchery sub v1.16b,v13.16b,v1.16b 241*4757b351SPierre Pronchery sub v2.16b,v13.16b,v2.16b 242*4757b351SPierre Pronchery tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b 243*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 244*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 245*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 246*4757b351SPierre Pronchery add v0.2d,v0.2d,v1.2d 247*4757b351SPierre Pronchery add v2.2d,v2.2d,v13.2d 248*4757b351SPierre Pronchery add v13.2d,v0.2d,v2.2d 249*4757b351SPierre Pronchery 250*4757b351SPierre Pronchery ushr v0.4s,v13.4s,32-2 251*4757b351SPierre Pronchery sli v0.4s,v13.4s,2 252*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-10 253*4757b351SPierre Pronchery eor v1.16b,v0.16b,v13.16b 254*4757b351SPierre Pronchery sli v2.4s,v13.4s,10 255*4757b351SPierre Pronchery eor v1.16b,v2.16b,v1.16b 256*4757b351SPierre Pronchery ushr v0.4s,v13.4s,32-18 257*4757b351SPierre Pronchery sli v0.4s,v13.4s,18 258*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-24 259*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 260*4757b351SPierre Pronchery sli v2.4s,v13.4s,24 261*4757b351SPierre Pronchery eor v13.16b,v2.16b,v1.16b 262*4757b351SPierre Pronchery eor v7.16b,v7.16b,v13.16b 263*4757b351SPierre Pronchery subs w11,w11,#1 264*4757b351SPierre Pronchery b.ne 10b 265*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 266*4757b351SPierre Pronchery rev32 v3.16b,v4.16b 267*4757b351SPierre Pronchery#else 268*4757b351SPierre Pronchery mov v3.16b,v4.16b 269*4757b351SPierre Pronchery#endif 270*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 271*4757b351SPierre Pronchery rev32 v2.16b,v5.16b 272*4757b351SPierre Pronchery#else 273*4757b351SPierre Pronchery mov v2.16b,v5.16b 274*4757b351SPierre Pronchery#endif 275*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 276*4757b351SPierre Pronchery rev32 v1.16b,v6.16b 277*4757b351SPierre Pronchery#else 278*4757b351SPierre Pronchery mov v1.16b,v6.16b 279*4757b351SPierre Pronchery#endif 280*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 281*4757b351SPierre Pronchery rev32 v0.16b,v7.16b 282*4757b351SPierre Pronchery#else 283*4757b351SPierre Pronchery mov v0.16b,v7.16b 284*4757b351SPierre Pronchery#endif 285*4757b351SPierre Pronchery ret 286*4757b351SPierre Pronchery.size _vpsm4_enc_4blks,.-_vpsm4_enc_4blks 287*4757b351SPierre Pronchery.type _vpsm4_enc_8blks,%function 288*4757b351SPierre Pronchery.align 4 289*4757b351SPierre Pronchery_vpsm4_enc_8blks: 290*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 291*4757b351SPierre Pronchery mov x10,x3 292*4757b351SPierre Pronchery mov w11,#8 293*4757b351SPierre Pronchery10: 294*4757b351SPierre Pronchery ldp w7,w8,[x10],8 295*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 296*4757b351SPierre Pronchery dup v12.4s,w7 297*4757b351SPierre Pronchery eor v14.16b,v6.16b,v7.16b 298*4757b351SPierre Pronchery eor v15.16b,v10.16b,v11.16b 299*4757b351SPierre Pronchery eor v0.16b,v5.16b,v12.16b 300*4757b351SPierre Pronchery eor v1.16b,v9.16b,v12.16b 301*4757b351SPierre Pronchery eor v12.16b,v14.16b,v0.16b 302*4757b351SPierre Pronchery eor v13.16b,v15.16b,v1.16b 303*4757b351SPierre Pronchery movi v3.16b,#64 304*4757b351SPierre Pronchery sub v0.16b,v12.16b,v3.16b 305*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 306*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 307*4757b351SPierre Pronchery tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b 308*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 309*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 310*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 311*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 312*4757b351SPierre Pronchery add v12.2d,v2.2d,v12.2d 313*4757b351SPierre Pronchery add v12.2d,v1.2d,v12.2d 314*4757b351SPierre Pronchery 315*4757b351SPierre Pronchery sub v0.16b,v13.16b,v3.16b 316*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 317*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 318*4757b351SPierre Pronchery tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b 319*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 320*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 321*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 322*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 323*4757b351SPierre Pronchery add v13.2d,v2.2d,v13.2d 324*4757b351SPierre Pronchery add v13.2d,v1.2d,v13.2d 325*4757b351SPierre Pronchery 326*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-2 327*4757b351SPierre Pronchery sli v0.4s,v12.4s,2 328*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-2 329*4757b351SPierre Pronchery eor v1.16b,v0.16b,v12.16b 330*4757b351SPierre Pronchery sli v2.4s,v13.4s,2 331*4757b351SPierre Pronchery 332*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-10 333*4757b351SPierre Pronchery eor v3.16b,v2.16b,v13.16b 334*4757b351SPierre Pronchery sli v0.4s,v12.4s,10 335*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-10 336*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 337*4757b351SPierre Pronchery sli v2.4s,v13.4s,10 338*4757b351SPierre Pronchery 339*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-18 340*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 341*4757b351SPierre Pronchery sli v0.4s,v12.4s,18 342*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-18 343*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 344*4757b351SPierre Pronchery sli v2.4s,v13.4s,18 345*4757b351SPierre Pronchery 346*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-24 347*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 348*4757b351SPierre Pronchery sli v0.4s,v12.4s,24 349*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-24 350*4757b351SPierre Pronchery eor v12.16b,v0.16b,v1.16b 351*4757b351SPierre Pronchery sli v2.4s,v13.4s,24 352*4757b351SPierre Pronchery eor v13.16b,v2.16b,v3.16b 353*4757b351SPierre Pronchery eor v4.16b,v4.16b,v12.16b 354*4757b351SPierre Pronchery eor v8.16b,v8.16b,v13.16b 355*4757b351SPierre Pronchery 356*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 357*4757b351SPierre Pronchery dup v13.4s,w8 358*4757b351SPierre Pronchery eor v14.16b,v14.16b,v4.16b 359*4757b351SPierre Pronchery eor v15.16b,v15.16b,v8.16b 360*4757b351SPierre Pronchery eor v12.16b,v14.16b,v13.16b 361*4757b351SPierre Pronchery eor v13.16b,v15.16b,v13.16b 362*4757b351SPierre Pronchery movi v3.16b,#64 363*4757b351SPierre Pronchery sub v0.16b,v12.16b,v3.16b 364*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 365*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 366*4757b351SPierre Pronchery tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b 367*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 368*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 369*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 370*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 371*4757b351SPierre Pronchery add v12.2d,v2.2d,v12.2d 372*4757b351SPierre Pronchery add v12.2d,v1.2d,v12.2d 373*4757b351SPierre Pronchery 374*4757b351SPierre Pronchery sub v0.16b,v13.16b,v3.16b 375*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 376*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 377*4757b351SPierre Pronchery tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b 378*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 379*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 380*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 381*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 382*4757b351SPierre Pronchery add v13.2d,v2.2d,v13.2d 383*4757b351SPierre Pronchery add v13.2d,v1.2d,v13.2d 384*4757b351SPierre Pronchery 385*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-2 386*4757b351SPierre Pronchery sli v0.4s,v12.4s,2 387*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-2 388*4757b351SPierre Pronchery eor v1.16b,v0.16b,v12.16b 389*4757b351SPierre Pronchery sli v2.4s,v13.4s,2 390*4757b351SPierre Pronchery 391*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-10 392*4757b351SPierre Pronchery eor v3.16b,v2.16b,v13.16b 393*4757b351SPierre Pronchery sli v0.4s,v12.4s,10 394*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-10 395*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 396*4757b351SPierre Pronchery sli v2.4s,v13.4s,10 397*4757b351SPierre Pronchery 398*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-18 399*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 400*4757b351SPierre Pronchery sli v0.4s,v12.4s,18 401*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-18 402*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 403*4757b351SPierre Pronchery sli v2.4s,v13.4s,18 404*4757b351SPierre Pronchery 405*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-24 406*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 407*4757b351SPierre Pronchery sli v0.4s,v12.4s,24 408*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-24 409*4757b351SPierre Pronchery eor v12.16b,v0.16b,v1.16b 410*4757b351SPierre Pronchery sli v2.4s,v13.4s,24 411*4757b351SPierre Pronchery eor v13.16b,v2.16b,v3.16b 412*4757b351SPierre Pronchery ldp w7,w8,[x10],8 413*4757b351SPierre Pronchery eor v5.16b,v5.16b,v12.16b 414*4757b351SPierre Pronchery eor v9.16b,v9.16b,v13.16b 415*4757b351SPierre Pronchery 416*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 417*4757b351SPierre Pronchery dup v12.4s,w7 418*4757b351SPierre Pronchery eor v14.16b,v4.16b,v5.16b 419*4757b351SPierre Pronchery eor v15.16b,v8.16b,v9.16b 420*4757b351SPierre Pronchery eor v0.16b,v7.16b,v12.16b 421*4757b351SPierre Pronchery eor v1.16b,v11.16b,v12.16b 422*4757b351SPierre Pronchery eor v12.16b,v14.16b,v0.16b 423*4757b351SPierre Pronchery eor v13.16b,v15.16b,v1.16b 424*4757b351SPierre Pronchery movi v3.16b,#64 425*4757b351SPierre Pronchery sub v0.16b,v12.16b,v3.16b 426*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 427*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 428*4757b351SPierre Pronchery tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b 429*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 430*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 431*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 432*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 433*4757b351SPierre Pronchery add v12.2d,v2.2d,v12.2d 434*4757b351SPierre Pronchery add v12.2d,v1.2d,v12.2d 435*4757b351SPierre Pronchery 436*4757b351SPierre Pronchery sub v0.16b,v13.16b,v3.16b 437*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 438*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 439*4757b351SPierre Pronchery tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b 440*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 441*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 442*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 443*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 444*4757b351SPierre Pronchery add v13.2d,v2.2d,v13.2d 445*4757b351SPierre Pronchery add v13.2d,v1.2d,v13.2d 446*4757b351SPierre Pronchery 447*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-2 448*4757b351SPierre Pronchery sli v0.4s,v12.4s,2 449*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-2 450*4757b351SPierre Pronchery eor v1.16b,v0.16b,v12.16b 451*4757b351SPierre Pronchery sli v2.4s,v13.4s,2 452*4757b351SPierre Pronchery 453*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-10 454*4757b351SPierre Pronchery eor v3.16b,v2.16b,v13.16b 455*4757b351SPierre Pronchery sli v0.4s,v12.4s,10 456*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-10 457*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 458*4757b351SPierre Pronchery sli v2.4s,v13.4s,10 459*4757b351SPierre Pronchery 460*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-18 461*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 462*4757b351SPierre Pronchery sli v0.4s,v12.4s,18 463*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-18 464*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 465*4757b351SPierre Pronchery sli v2.4s,v13.4s,18 466*4757b351SPierre Pronchery 467*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-24 468*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 469*4757b351SPierre Pronchery sli v0.4s,v12.4s,24 470*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-24 471*4757b351SPierre Pronchery eor v12.16b,v0.16b,v1.16b 472*4757b351SPierre Pronchery sli v2.4s,v13.4s,24 473*4757b351SPierre Pronchery eor v13.16b,v2.16b,v3.16b 474*4757b351SPierre Pronchery eor v6.16b,v6.16b,v12.16b 475*4757b351SPierre Pronchery eor v10.16b,v10.16b,v13.16b 476*4757b351SPierre Pronchery 477*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 478*4757b351SPierre Pronchery dup v13.4s,w8 479*4757b351SPierre Pronchery eor v14.16b,v14.16b,v6.16b 480*4757b351SPierre Pronchery eor v15.16b,v15.16b,v10.16b 481*4757b351SPierre Pronchery eor v12.16b,v14.16b,v13.16b 482*4757b351SPierre Pronchery eor v13.16b,v15.16b,v13.16b 483*4757b351SPierre Pronchery movi v3.16b,#64 484*4757b351SPierre Pronchery sub v0.16b,v12.16b,v3.16b 485*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 486*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 487*4757b351SPierre Pronchery tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b 488*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 489*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 490*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 491*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 492*4757b351SPierre Pronchery add v12.2d,v2.2d,v12.2d 493*4757b351SPierre Pronchery add v12.2d,v1.2d,v12.2d 494*4757b351SPierre Pronchery 495*4757b351SPierre Pronchery sub v0.16b,v13.16b,v3.16b 496*4757b351SPierre Pronchery sub v1.16b,v0.16b,v3.16b 497*4757b351SPierre Pronchery sub v2.16b,v1.16b,v3.16b 498*4757b351SPierre Pronchery tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b 499*4757b351SPierre Pronchery tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b 500*4757b351SPierre Pronchery tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b 501*4757b351SPierre Pronchery tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b 502*4757b351SPierre Pronchery add v1.2d,v0.2d,v1.2d 503*4757b351SPierre Pronchery add v13.2d,v2.2d,v13.2d 504*4757b351SPierre Pronchery add v13.2d,v1.2d,v13.2d 505*4757b351SPierre Pronchery 506*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-2 507*4757b351SPierre Pronchery sli v0.4s,v12.4s,2 508*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-2 509*4757b351SPierre Pronchery eor v1.16b,v0.16b,v12.16b 510*4757b351SPierre Pronchery sli v2.4s,v13.4s,2 511*4757b351SPierre Pronchery 512*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-10 513*4757b351SPierre Pronchery eor v3.16b,v2.16b,v13.16b 514*4757b351SPierre Pronchery sli v0.4s,v12.4s,10 515*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-10 516*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 517*4757b351SPierre Pronchery sli v2.4s,v13.4s,10 518*4757b351SPierre Pronchery 519*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-18 520*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 521*4757b351SPierre Pronchery sli v0.4s,v12.4s,18 522*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-18 523*4757b351SPierre Pronchery eor v1.16b,v0.16b,v1.16b 524*4757b351SPierre Pronchery sli v2.4s,v13.4s,18 525*4757b351SPierre Pronchery 526*4757b351SPierre Pronchery ushr v0.4s,v12.4s,32-24 527*4757b351SPierre Pronchery eor v3.16b,v2.16b,v3.16b 528*4757b351SPierre Pronchery sli v0.4s,v12.4s,24 529*4757b351SPierre Pronchery ushr v2.4s,v13.4s,32-24 530*4757b351SPierre Pronchery eor v12.16b,v0.16b,v1.16b 531*4757b351SPierre Pronchery sli v2.4s,v13.4s,24 532*4757b351SPierre Pronchery eor v13.16b,v2.16b,v3.16b 533*4757b351SPierre Pronchery eor v7.16b,v7.16b,v12.16b 534*4757b351SPierre Pronchery eor v11.16b,v11.16b,v13.16b 535*4757b351SPierre Pronchery subs w11,w11,#1 536*4757b351SPierre Pronchery b.ne 10b 537*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 538*4757b351SPierre Pronchery rev32 v3.16b,v4.16b 539*4757b351SPierre Pronchery#else 540*4757b351SPierre Pronchery mov v3.16b,v4.16b 541*4757b351SPierre Pronchery#endif 542*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 543*4757b351SPierre Pronchery rev32 v2.16b,v5.16b 544*4757b351SPierre Pronchery#else 545*4757b351SPierre Pronchery mov v2.16b,v5.16b 546*4757b351SPierre Pronchery#endif 547*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 548*4757b351SPierre Pronchery rev32 v1.16b,v6.16b 549*4757b351SPierre Pronchery#else 550*4757b351SPierre Pronchery mov v1.16b,v6.16b 551*4757b351SPierre Pronchery#endif 552*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 553*4757b351SPierre Pronchery rev32 v0.16b,v7.16b 554*4757b351SPierre Pronchery#else 555*4757b351SPierre Pronchery mov v0.16b,v7.16b 556*4757b351SPierre Pronchery#endif 557*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 558*4757b351SPierre Pronchery rev32 v7.16b,v8.16b 559*4757b351SPierre Pronchery#else 560*4757b351SPierre Pronchery mov v7.16b,v8.16b 561*4757b351SPierre Pronchery#endif 562*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 563*4757b351SPierre Pronchery rev32 v6.16b,v9.16b 564*4757b351SPierre Pronchery#else 565*4757b351SPierre Pronchery mov v6.16b,v9.16b 566*4757b351SPierre Pronchery#endif 567*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 568*4757b351SPierre Pronchery rev32 v5.16b,v10.16b 569*4757b351SPierre Pronchery#else 570*4757b351SPierre Pronchery mov v5.16b,v10.16b 571*4757b351SPierre Pronchery#endif 572*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 573*4757b351SPierre Pronchery rev32 v4.16b,v11.16b 574*4757b351SPierre Pronchery#else 575*4757b351SPierre Pronchery mov v4.16b,v11.16b 576*4757b351SPierre Pronchery#endif 577*4757b351SPierre Pronchery ret 578*4757b351SPierre Pronchery.size _vpsm4_enc_8blks,.-_vpsm4_enc_8blks 579*4757b351SPierre Pronchery.globl vpsm4_set_encrypt_key 580*4757b351SPierre Pronchery.type vpsm4_set_encrypt_key,%function 581*4757b351SPierre Pronchery.align 5 582*4757b351SPierre Proncheryvpsm4_set_encrypt_key: 583*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 584*4757b351SPierre Pronchery stp x29,x30,[sp,#-16]! 585*4757b351SPierre Pronchery mov w2,1 586*4757b351SPierre Pronchery bl _vpsm4_set_key 587*4757b351SPierre Pronchery ldp x29,x30,[sp],#16 588*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 589*4757b351SPierre Pronchery ret 590*4757b351SPierre Pronchery.size vpsm4_set_encrypt_key,.-vpsm4_set_encrypt_key 591*4757b351SPierre Pronchery.globl vpsm4_set_decrypt_key 592*4757b351SPierre Pronchery.type vpsm4_set_decrypt_key,%function 593*4757b351SPierre Pronchery.align 5 594*4757b351SPierre Proncheryvpsm4_set_decrypt_key: 595*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 596*4757b351SPierre Pronchery stp x29,x30,[sp,#-16]! 597*4757b351SPierre Pronchery mov w2,0 598*4757b351SPierre Pronchery bl _vpsm4_set_key 599*4757b351SPierre Pronchery ldp x29,x30,[sp],#16 600*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 601*4757b351SPierre Pronchery ret 602*4757b351SPierre Pronchery.size vpsm4_set_decrypt_key,.-vpsm4_set_decrypt_key 603*4757b351SPierre Pronchery.globl vpsm4_encrypt 604*4757b351SPierre Pronchery.type vpsm4_encrypt,%function 605*4757b351SPierre Pronchery.align 5 606*4757b351SPierre Proncheryvpsm4_encrypt: 607*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 608*4757b351SPierre Pronchery ld1 {v4.4s},[x0] 609*4757b351SPierre Pronchery adrp x10,.Lsbox 610*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 611*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 612*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 613*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 614*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 615*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 616*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 617*4757b351SPierre Pronchery#endif 618*4757b351SPierre Pronchery mov x3,x2 619*4757b351SPierre Pronchery mov x10,x3 620*4757b351SPierre Pronchery mov w11,#8 621*4757b351SPierre Pronchery mov w12,v4.s[0] 622*4757b351SPierre Pronchery mov w13,v4.s[1] 623*4757b351SPierre Pronchery mov w14,v4.s[2] 624*4757b351SPierre Pronchery mov w15,v4.s[3] 625*4757b351SPierre Pronchery10: 626*4757b351SPierre Pronchery ldp w7,w8,[x10],8 627*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 628*4757b351SPierre Pronchery eor w6,w14,w15 629*4757b351SPierre Pronchery eor w9,w7,w13 630*4757b351SPierre Pronchery eor w6,w6,w9 631*4757b351SPierre Pronchery movi v1.16b,#64 632*4757b351SPierre Pronchery movi v2.16b,#128 633*4757b351SPierre Pronchery movi v3.16b,#192 634*4757b351SPierre Pronchery mov v0.s[0],w6 635*4757b351SPierre Pronchery 636*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 637*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 638*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 639*4757b351SPierre Pronchery 640*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 641*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 642*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 643*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 644*4757b351SPierre Pronchery 645*4757b351SPierre Pronchery mov w6,v0.s[0] 646*4757b351SPierre Pronchery mov w7,v1.s[0] 647*4757b351SPierre Pronchery mov w9,v2.s[0] 648*4757b351SPierre Pronchery add w7,w6,w7 649*4757b351SPierre Pronchery mov w6,v3.s[0] 650*4757b351SPierre Pronchery add w7,w7,w9 651*4757b351SPierre Pronchery add w7,w7,w6 652*4757b351SPierre Pronchery 653*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 654*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 655*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 656*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 657*4757b351SPierre Pronchery eor w12,w12,w6 658*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 659*4757b351SPierre Pronchery eor w6,w14,w15 660*4757b351SPierre Pronchery eor w9,w12,w8 661*4757b351SPierre Pronchery eor w6,w6,w9 662*4757b351SPierre Pronchery movi v1.16b,#64 663*4757b351SPierre Pronchery movi v2.16b,#128 664*4757b351SPierre Pronchery movi v3.16b,#192 665*4757b351SPierre Pronchery mov v0.s[0],w6 666*4757b351SPierre Pronchery 667*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 668*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 669*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 670*4757b351SPierre Pronchery 671*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 672*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 673*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 674*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 675*4757b351SPierre Pronchery 676*4757b351SPierre Pronchery mov w6,v0.s[0] 677*4757b351SPierre Pronchery mov w7,v1.s[0] 678*4757b351SPierre Pronchery mov w9,v2.s[0] 679*4757b351SPierre Pronchery add w7,w6,w7 680*4757b351SPierre Pronchery mov w6,v3.s[0] 681*4757b351SPierre Pronchery add w7,w7,w9 682*4757b351SPierre Pronchery add w7,w7,w6 683*4757b351SPierre Pronchery 684*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 685*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 686*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 687*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 688*4757b351SPierre Pronchery ldp w7,w8,[x10],8 689*4757b351SPierre Pronchery eor w13,w13,w6 690*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 691*4757b351SPierre Pronchery eor w6,w12,w13 692*4757b351SPierre Pronchery eor w9,w7,w15 693*4757b351SPierre Pronchery eor w6,w6,w9 694*4757b351SPierre Pronchery movi v1.16b,#64 695*4757b351SPierre Pronchery movi v2.16b,#128 696*4757b351SPierre Pronchery movi v3.16b,#192 697*4757b351SPierre Pronchery mov v0.s[0],w6 698*4757b351SPierre Pronchery 699*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 700*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 701*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 702*4757b351SPierre Pronchery 703*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 704*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 705*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 706*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 707*4757b351SPierre Pronchery 708*4757b351SPierre Pronchery mov w6,v0.s[0] 709*4757b351SPierre Pronchery mov w7,v1.s[0] 710*4757b351SPierre Pronchery mov w9,v2.s[0] 711*4757b351SPierre Pronchery add w7,w6,w7 712*4757b351SPierre Pronchery mov w6,v3.s[0] 713*4757b351SPierre Pronchery add w7,w7,w9 714*4757b351SPierre Pronchery add w7,w7,w6 715*4757b351SPierre Pronchery 716*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 717*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 718*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 719*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 720*4757b351SPierre Pronchery eor w14,w14,w6 721*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 722*4757b351SPierre Pronchery eor w6,w12,w13 723*4757b351SPierre Pronchery eor w9,w14,w8 724*4757b351SPierre Pronchery eor w6,w6,w9 725*4757b351SPierre Pronchery movi v1.16b,#64 726*4757b351SPierre Pronchery movi v2.16b,#128 727*4757b351SPierre Pronchery movi v3.16b,#192 728*4757b351SPierre Pronchery mov v0.s[0],w6 729*4757b351SPierre Pronchery 730*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 731*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 732*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 733*4757b351SPierre Pronchery 734*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 735*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 736*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 737*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 738*4757b351SPierre Pronchery 739*4757b351SPierre Pronchery mov w6,v0.s[0] 740*4757b351SPierre Pronchery mov w7,v1.s[0] 741*4757b351SPierre Pronchery mov w9,v2.s[0] 742*4757b351SPierre Pronchery add w7,w6,w7 743*4757b351SPierre Pronchery mov w6,v3.s[0] 744*4757b351SPierre Pronchery add w7,w7,w9 745*4757b351SPierre Pronchery add w7,w7,w6 746*4757b351SPierre Pronchery 747*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 748*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 749*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 750*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 751*4757b351SPierre Pronchery eor w15,w15,w6 752*4757b351SPierre Pronchery subs w11,w11,#1 753*4757b351SPierre Pronchery b.ne 10b 754*4757b351SPierre Pronchery mov v4.s[0],w15 755*4757b351SPierre Pronchery mov v4.s[1],w14 756*4757b351SPierre Pronchery mov v4.s[2],w13 757*4757b351SPierre Pronchery mov v4.s[3],w12 758*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 759*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 760*4757b351SPierre Pronchery#endif 761*4757b351SPierre Pronchery st1 {v4.4s},[x1] 762*4757b351SPierre Pronchery ret 763*4757b351SPierre Pronchery.size vpsm4_encrypt,.-vpsm4_encrypt 764*4757b351SPierre Pronchery.globl vpsm4_decrypt 765*4757b351SPierre Pronchery.type vpsm4_decrypt,%function 766*4757b351SPierre Pronchery.align 5 767*4757b351SPierre Proncheryvpsm4_decrypt: 768*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 769*4757b351SPierre Pronchery ld1 {v4.4s},[x0] 770*4757b351SPierre Pronchery adrp x10,.Lsbox 771*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 772*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 773*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 774*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 775*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 776*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 777*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 778*4757b351SPierre Pronchery#endif 779*4757b351SPierre Pronchery mov x3,x2 780*4757b351SPierre Pronchery mov x10,x3 781*4757b351SPierre Pronchery mov w11,#8 782*4757b351SPierre Pronchery mov w12,v4.s[0] 783*4757b351SPierre Pronchery mov w13,v4.s[1] 784*4757b351SPierre Pronchery mov w14,v4.s[2] 785*4757b351SPierre Pronchery mov w15,v4.s[3] 786*4757b351SPierre Pronchery10: 787*4757b351SPierre Pronchery ldp w7,w8,[x10],8 788*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 789*4757b351SPierre Pronchery eor w6,w14,w15 790*4757b351SPierre Pronchery eor w9,w7,w13 791*4757b351SPierre Pronchery eor w6,w6,w9 792*4757b351SPierre Pronchery movi v1.16b,#64 793*4757b351SPierre Pronchery movi v2.16b,#128 794*4757b351SPierre Pronchery movi v3.16b,#192 795*4757b351SPierre Pronchery mov v0.s[0],w6 796*4757b351SPierre Pronchery 797*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 798*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 799*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 800*4757b351SPierre Pronchery 801*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 802*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 803*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 804*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 805*4757b351SPierre Pronchery 806*4757b351SPierre Pronchery mov w6,v0.s[0] 807*4757b351SPierre Pronchery mov w7,v1.s[0] 808*4757b351SPierre Pronchery mov w9,v2.s[0] 809*4757b351SPierre Pronchery add w7,w6,w7 810*4757b351SPierre Pronchery mov w6,v3.s[0] 811*4757b351SPierre Pronchery add w7,w7,w9 812*4757b351SPierre Pronchery add w7,w7,w6 813*4757b351SPierre Pronchery 814*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 815*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 816*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 817*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 818*4757b351SPierre Pronchery eor w12,w12,w6 819*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 820*4757b351SPierre Pronchery eor w6,w14,w15 821*4757b351SPierre Pronchery eor w9,w12,w8 822*4757b351SPierre Pronchery eor w6,w6,w9 823*4757b351SPierre Pronchery movi v1.16b,#64 824*4757b351SPierre Pronchery movi v2.16b,#128 825*4757b351SPierre Pronchery movi v3.16b,#192 826*4757b351SPierre Pronchery mov v0.s[0],w6 827*4757b351SPierre Pronchery 828*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 829*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 830*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 831*4757b351SPierre Pronchery 832*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 833*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 834*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 835*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 836*4757b351SPierre Pronchery 837*4757b351SPierre Pronchery mov w6,v0.s[0] 838*4757b351SPierre Pronchery mov w7,v1.s[0] 839*4757b351SPierre Pronchery mov w9,v2.s[0] 840*4757b351SPierre Pronchery add w7,w6,w7 841*4757b351SPierre Pronchery mov w6,v3.s[0] 842*4757b351SPierre Pronchery add w7,w7,w9 843*4757b351SPierre Pronchery add w7,w7,w6 844*4757b351SPierre Pronchery 845*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 846*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 847*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 848*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 849*4757b351SPierre Pronchery ldp w7,w8,[x10],8 850*4757b351SPierre Pronchery eor w13,w13,w6 851*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 852*4757b351SPierre Pronchery eor w6,w12,w13 853*4757b351SPierre Pronchery eor w9,w7,w15 854*4757b351SPierre Pronchery eor w6,w6,w9 855*4757b351SPierre Pronchery movi v1.16b,#64 856*4757b351SPierre Pronchery movi v2.16b,#128 857*4757b351SPierre Pronchery movi v3.16b,#192 858*4757b351SPierre Pronchery mov v0.s[0],w6 859*4757b351SPierre Pronchery 860*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 861*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 862*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 863*4757b351SPierre Pronchery 864*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 865*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 866*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 867*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 868*4757b351SPierre Pronchery 869*4757b351SPierre Pronchery mov w6,v0.s[0] 870*4757b351SPierre Pronchery mov w7,v1.s[0] 871*4757b351SPierre Pronchery mov w9,v2.s[0] 872*4757b351SPierre Pronchery add w7,w6,w7 873*4757b351SPierre Pronchery mov w6,v3.s[0] 874*4757b351SPierre Pronchery add w7,w7,w9 875*4757b351SPierre Pronchery add w7,w7,w6 876*4757b351SPierre Pronchery 877*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 878*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 879*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 880*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 881*4757b351SPierre Pronchery eor w14,w14,w6 882*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 883*4757b351SPierre Pronchery eor w6,w12,w13 884*4757b351SPierre Pronchery eor w9,w14,w8 885*4757b351SPierre Pronchery eor w6,w6,w9 886*4757b351SPierre Pronchery movi v1.16b,#64 887*4757b351SPierre Pronchery movi v2.16b,#128 888*4757b351SPierre Pronchery movi v3.16b,#192 889*4757b351SPierre Pronchery mov v0.s[0],w6 890*4757b351SPierre Pronchery 891*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 892*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 893*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 894*4757b351SPierre Pronchery 895*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 896*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 897*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 898*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 899*4757b351SPierre Pronchery 900*4757b351SPierre Pronchery mov w6,v0.s[0] 901*4757b351SPierre Pronchery mov w7,v1.s[0] 902*4757b351SPierre Pronchery mov w9,v2.s[0] 903*4757b351SPierre Pronchery add w7,w6,w7 904*4757b351SPierre Pronchery mov w6,v3.s[0] 905*4757b351SPierre Pronchery add w7,w7,w9 906*4757b351SPierre Pronchery add w7,w7,w6 907*4757b351SPierre Pronchery 908*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 909*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 910*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 911*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 912*4757b351SPierre Pronchery eor w15,w15,w6 913*4757b351SPierre Pronchery subs w11,w11,#1 914*4757b351SPierre Pronchery b.ne 10b 915*4757b351SPierre Pronchery mov v4.s[0],w15 916*4757b351SPierre Pronchery mov v4.s[1],w14 917*4757b351SPierre Pronchery mov v4.s[2],w13 918*4757b351SPierre Pronchery mov v4.s[3],w12 919*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 920*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 921*4757b351SPierre Pronchery#endif 922*4757b351SPierre Pronchery st1 {v4.4s},[x1] 923*4757b351SPierre Pronchery ret 924*4757b351SPierre Pronchery.size vpsm4_decrypt,.-vpsm4_decrypt 925*4757b351SPierre Pronchery.globl vpsm4_ecb_encrypt 926*4757b351SPierre Pronchery.type vpsm4_ecb_encrypt,%function 927*4757b351SPierre Pronchery.align 5 928*4757b351SPierre Proncheryvpsm4_ecb_encrypt: 929*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 930*4757b351SPierre Pronchery // convert length into blocks 931*4757b351SPierre Pronchery lsr x2,x2,4 932*4757b351SPierre Pronchery stp d8,d9,[sp,#-80]! 933*4757b351SPierre Pronchery stp d10,d11,[sp,#16] 934*4757b351SPierre Pronchery stp d12,d13,[sp,#32] 935*4757b351SPierre Pronchery stp d14,d15,[sp,#48] 936*4757b351SPierre Pronchery stp x29,x30,[sp,#64] 937*4757b351SPierre Pronchery adrp x10,.Lsbox 938*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 939*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 940*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 941*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 942*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 943*4757b351SPierre Pronchery.Lecb_8_blocks_process: 944*4757b351SPierre Pronchery cmp w2,#8 945*4757b351SPierre Pronchery b.lt .Lecb_4_blocks_process 946*4757b351SPierre Pronchery ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 947*4757b351SPierre Pronchery ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 948*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 949*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 950*4757b351SPierre Pronchery#endif 951*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 952*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 953*4757b351SPierre Pronchery#endif 954*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 955*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 956*4757b351SPierre Pronchery#endif 957*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 958*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 959*4757b351SPierre Pronchery#endif 960*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 961*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 962*4757b351SPierre Pronchery#endif 963*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 964*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 965*4757b351SPierre Pronchery#endif 966*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 967*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 968*4757b351SPierre Pronchery#endif 969*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 970*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 971*4757b351SPierre Pronchery#endif 972*4757b351SPierre Pronchery bl _vpsm4_enc_8blks 973*4757b351SPierre Pronchery st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 974*4757b351SPierre Pronchery st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 975*4757b351SPierre Pronchery subs w2,w2,#8 976*4757b351SPierre Pronchery b.gt .Lecb_8_blocks_process 977*4757b351SPierre Pronchery b 100f 978*4757b351SPierre Pronchery.Lecb_4_blocks_process: 979*4757b351SPierre Pronchery cmp w2,#4 980*4757b351SPierre Pronchery b.lt 1f 981*4757b351SPierre Pronchery ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 982*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 983*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 984*4757b351SPierre Pronchery#endif 985*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 986*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 987*4757b351SPierre Pronchery#endif 988*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 989*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 990*4757b351SPierre Pronchery#endif 991*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 992*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 993*4757b351SPierre Pronchery#endif 994*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 995*4757b351SPierre Pronchery st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 996*4757b351SPierre Pronchery sub w2,w2,#4 997*4757b351SPierre Pronchery1: 998*4757b351SPierre Pronchery // process last block 999*4757b351SPierre Pronchery cmp w2,#1 1000*4757b351SPierre Pronchery b.lt 100f 1001*4757b351SPierre Pronchery b.gt 1f 1002*4757b351SPierre Pronchery ld1 {v4.4s},[x0] 1003*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1004*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 1005*4757b351SPierre Pronchery#endif 1006*4757b351SPierre Pronchery mov x10,x3 1007*4757b351SPierre Pronchery mov w11,#8 1008*4757b351SPierre Pronchery mov w12,v4.s[0] 1009*4757b351SPierre Pronchery mov w13,v4.s[1] 1010*4757b351SPierre Pronchery mov w14,v4.s[2] 1011*4757b351SPierre Pronchery mov w15,v4.s[3] 1012*4757b351SPierre Pronchery10: 1013*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1014*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 1015*4757b351SPierre Pronchery eor w6,w14,w15 1016*4757b351SPierre Pronchery eor w9,w7,w13 1017*4757b351SPierre Pronchery eor w6,w6,w9 1018*4757b351SPierre Pronchery movi v1.16b,#64 1019*4757b351SPierre Pronchery movi v2.16b,#128 1020*4757b351SPierre Pronchery movi v3.16b,#192 1021*4757b351SPierre Pronchery mov v0.s[0],w6 1022*4757b351SPierre Pronchery 1023*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1024*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1025*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1026*4757b351SPierre Pronchery 1027*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1028*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1029*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1030*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1031*4757b351SPierre Pronchery 1032*4757b351SPierre Pronchery mov w6,v0.s[0] 1033*4757b351SPierre Pronchery mov w7,v1.s[0] 1034*4757b351SPierre Pronchery mov w9,v2.s[0] 1035*4757b351SPierre Pronchery add w7,w6,w7 1036*4757b351SPierre Pronchery mov w6,v3.s[0] 1037*4757b351SPierre Pronchery add w7,w7,w9 1038*4757b351SPierre Pronchery add w7,w7,w6 1039*4757b351SPierre Pronchery 1040*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1041*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1042*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1043*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1044*4757b351SPierre Pronchery eor w12,w12,w6 1045*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 1046*4757b351SPierre Pronchery eor w6,w14,w15 1047*4757b351SPierre Pronchery eor w9,w12,w8 1048*4757b351SPierre Pronchery eor w6,w6,w9 1049*4757b351SPierre Pronchery movi v1.16b,#64 1050*4757b351SPierre Pronchery movi v2.16b,#128 1051*4757b351SPierre Pronchery movi v3.16b,#192 1052*4757b351SPierre Pronchery mov v0.s[0],w6 1053*4757b351SPierre Pronchery 1054*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1055*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1056*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1057*4757b351SPierre Pronchery 1058*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1059*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1060*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1061*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1062*4757b351SPierre Pronchery 1063*4757b351SPierre Pronchery mov w6,v0.s[0] 1064*4757b351SPierre Pronchery mov w7,v1.s[0] 1065*4757b351SPierre Pronchery mov w9,v2.s[0] 1066*4757b351SPierre Pronchery add w7,w6,w7 1067*4757b351SPierre Pronchery mov w6,v3.s[0] 1068*4757b351SPierre Pronchery add w7,w7,w9 1069*4757b351SPierre Pronchery add w7,w7,w6 1070*4757b351SPierre Pronchery 1071*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1072*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1073*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1074*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1075*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1076*4757b351SPierre Pronchery eor w13,w13,w6 1077*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 1078*4757b351SPierre Pronchery eor w6,w12,w13 1079*4757b351SPierre Pronchery eor w9,w7,w15 1080*4757b351SPierre Pronchery eor w6,w6,w9 1081*4757b351SPierre Pronchery movi v1.16b,#64 1082*4757b351SPierre Pronchery movi v2.16b,#128 1083*4757b351SPierre Pronchery movi v3.16b,#192 1084*4757b351SPierre Pronchery mov v0.s[0],w6 1085*4757b351SPierre Pronchery 1086*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1087*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1088*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1089*4757b351SPierre Pronchery 1090*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1091*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1092*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1093*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1094*4757b351SPierre Pronchery 1095*4757b351SPierre Pronchery mov w6,v0.s[0] 1096*4757b351SPierre Pronchery mov w7,v1.s[0] 1097*4757b351SPierre Pronchery mov w9,v2.s[0] 1098*4757b351SPierre Pronchery add w7,w6,w7 1099*4757b351SPierre Pronchery mov w6,v3.s[0] 1100*4757b351SPierre Pronchery add w7,w7,w9 1101*4757b351SPierre Pronchery add w7,w7,w6 1102*4757b351SPierre Pronchery 1103*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1104*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1105*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1106*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1107*4757b351SPierre Pronchery eor w14,w14,w6 1108*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 1109*4757b351SPierre Pronchery eor w6,w12,w13 1110*4757b351SPierre Pronchery eor w9,w14,w8 1111*4757b351SPierre Pronchery eor w6,w6,w9 1112*4757b351SPierre Pronchery movi v1.16b,#64 1113*4757b351SPierre Pronchery movi v2.16b,#128 1114*4757b351SPierre Pronchery movi v3.16b,#192 1115*4757b351SPierre Pronchery mov v0.s[0],w6 1116*4757b351SPierre Pronchery 1117*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1118*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1119*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1120*4757b351SPierre Pronchery 1121*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1122*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1123*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1124*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1125*4757b351SPierre Pronchery 1126*4757b351SPierre Pronchery mov w6,v0.s[0] 1127*4757b351SPierre Pronchery mov w7,v1.s[0] 1128*4757b351SPierre Pronchery mov w9,v2.s[0] 1129*4757b351SPierre Pronchery add w7,w6,w7 1130*4757b351SPierre Pronchery mov w6,v3.s[0] 1131*4757b351SPierre Pronchery add w7,w7,w9 1132*4757b351SPierre Pronchery add w7,w7,w6 1133*4757b351SPierre Pronchery 1134*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1135*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1136*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1137*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1138*4757b351SPierre Pronchery eor w15,w15,w6 1139*4757b351SPierre Pronchery subs w11,w11,#1 1140*4757b351SPierre Pronchery b.ne 10b 1141*4757b351SPierre Pronchery mov v4.s[0],w15 1142*4757b351SPierre Pronchery mov v4.s[1],w14 1143*4757b351SPierre Pronchery mov v4.s[2],w13 1144*4757b351SPierre Pronchery mov v4.s[3],w12 1145*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1146*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 1147*4757b351SPierre Pronchery#endif 1148*4757b351SPierre Pronchery st1 {v4.4s},[x1] 1149*4757b351SPierre Pronchery b 100f 1150*4757b351SPierre Pronchery1: // process last 2 blocks 1151*4757b351SPierre Pronchery ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0],#16 1152*4757b351SPierre Pronchery ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x0],#16 1153*4757b351SPierre Pronchery cmp w2,#2 1154*4757b351SPierre Pronchery b.gt 1f 1155*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1156*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 1157*4757b351SPierre Pronchery#endif 1158*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1159*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 1160*4757b351SPierre Pronchery#endif 1161*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1162*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 1163*4757b351SPierre Pronchery#endif 1164*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1165*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 1166*4757b351SPierre Pronchery#endif 1167*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 1168*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 1169*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1] 1170*4757b351SPierre Pronchery b 100f 1171*4757b351SPierre Pronchery1: // process last 3 blocks 1172*4757b351SPierre Pronchery ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x0],#16 1173*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1174*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 1175*4757b351SPierre Pronchery#endif 1176*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1177*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 1178*4757b351SPierre Pronchery#endif 1179*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1180*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 1181*4757b351SPierre Pronchery#endif 1182*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1183*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 1184*4757b351SPierre Pronchery#endif 1185*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 1186*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 1187*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 1188*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1] 1189*4757b351SPierre Pronchery100: 1190*4757b351SPierre Pronchery ldp d10,d11,[sp,#16] 1191*4757b351SPierre Pronchery ldp d12,d13,[sp,#32] 1192*4757b351SPierre Pronchery ldp d14,d15,[sp,#48] 1193*4757b351SPierre Pronchery ldp x29,x30,[sp,#64] 1194*4757b351SPierre Pronchery ldp d8,d9,[sp],#80 1195*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 1196*4757b351SPierre Pronchery ret 1197*4757b351SPierre Pronchery.size vpsm4_ecb_encrypt,.-vpsm4_ecb_encrypt 1198*4757b351SPierre Pronchery.globl vpsm4_cbc_encrypt 1199*4757b351SPierre Pronchery.type vpsm4_cbc_encrypt,%function 1200*4757b351SPierre Pronchery.align 5 1201*4757b351SPierre Proncheryvpsm4_cbc_encrypt: 1202*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 1203*4757b351SPierre Pronchery lsr x2,x2,4 1204*4757b351SPierre Pronchery adrp x10,.Lsbox 1205*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 1206*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 1207*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 1208*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 1209*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 1210*4757b351SPierre Pronchery cbz w5,.Ldec 1211*4757b351SPierre Pronchery ld1 {v3.4s},[x4] 1212*4757b351SPierre Pronchery.Lcbc_4_blocks_enc: 1213*4757b351SPierre Pronchery cmp w2,#4 1214*4757b351SPierre Pronchery b.lt 1f 1215*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 1216*4757b351SPierre Pronchery eor v4.16b,v4.16b,v3.16b 1217*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1218*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 1219*4757b351SPierre Pronchery#endif 1220*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1221*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 1222*4757b351SPierre Pronchery#endif 1223*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1224*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 1225*4757b351SPierre Pronchery#endif 1226*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1227*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 1228*4757b351SPierre Pronchery#endif 1229*4757b351SPierre Pronchery mov x10,x3 1230*4757b351SPierre Pronchery mov w11,#8 1231*4757b351SPierre Pronchery mov w12,v4.s[0] 1232*4757b351SPierre Pronchery mov w13,v4.s[1] 1233*4757b351SPierre Pronchery mov w14,v4.s[2] 1234*4757b351SPierre Pronchery mov w15,v4.s[3] 1235*4757b351SPierre Pronchery10: 1236*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1237*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 1238*4757b351SPierre Pronchery eor w6,w14,w15 1239*4757b351SPierre Pronchery eor w9,w7,w13 1240*4757b351SPierre Pronchery eor w6,w6,w9 1241*4757b351SPierre Pronchery movi v1.16b,#64 1242*4757b351SPierre Pronchery movi v2.16b,#128 1243*4757b351SPierre Pronchery movi v3.16b,#192 1244*4757b351SPierre Pronchery mov v0.s[0],w6 1245*4757b351SPierre Pronchery 1246*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1247*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1248*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1249*4757b351SPierre Pronchery 1250*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1251*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1252*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1253*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1254*4757b351SPierre Pronchery 1255*4757b351SPierre Pronchery mov w6,v0.s[0] 1256*4757b351SPierre Pronchery mov w7,v1.s[0] 1257*4757b351SPierre Pronchery mov w9,v2.s[0] 1258*4757b351SPierre Pronchery add w7,w6,w7 1259*4757b351SPierre Pronchery mov w6,v3.s[0] 1260*4757b351SPierre Pronchery add w7,w7,w9 1261*4757b351SPierre Pronchery add w7,w7,w6 1262*4757b351SPierre Pronchery 1263*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1264*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1265*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1266*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1267*4757b351SPierre Pronchery eor w12,w12,w6 1268*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 1269*4757b351SPierre Pronchery eor w6,w14,w15 1270*4757b351SPierre Pronchery eor w9,w12,w8 1271*4757b351SPierre Pronchery eor w6,w6,w9 1272*4757b351SPierre Pronchery movi v1.16b,#64 1273*4757b351SPierre Pronchery movi v2.16b,#128 1274*4757b351SPierre Pronchery movi v3.16b,#192 1275*4757b351SPierre Pronchery mov v0.s[0],w6 1276*4757b351SPierre Pronchery 1277*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1278*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1279*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1280*4757b351SPierre Pronchery 1281*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1282*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1283*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1284*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1285*4757b351SPierre Pronchery 1286*4757b351SPierre Pronchery mov w6,v0.s[0] 1287*4757b351SPierre Pronchery mov w7,v1.s[0] 1288*4757b351SPierre Pronchery mov w9,v2.s[0] 1289*4757b351SPierre Pronchery add w7,w6,w7 1290*4757b351SPierre Pronchery mov w6,v3.s[0] 1291*4757b351SPierre Pronchery add w7,w7,w9 1292*4757b351SPierre Pronchery add w7,w7,w6 1293*4757b351SPierre Pronchery 1294*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1295*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1296*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1297*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1298*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1299*4757b351SPierre Pronchery eor w13,w13,w6 1300*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 1301*4757b351SPierre Pronchery eor w6,w12,w13 1302*4757b351SPierre Pronchery eor w9,w7,w15 1303*4757b351SPierre Pronchery eor w6,w6,w9 1304*4757b351SPierre Pronchery movi v1.16b,#64 1305*4757b351SPierre Pronchery movi v2.16b,#128 1306*4757b351SPierre Pronchery movi v3.16b,#192 1307*4757b351SPierre Pronchery mov v0.s[0],w6 1308*4757b351SPierre Pronchery 1309*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1310*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1311*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1312*4757b351SPierre Pronchery 1313*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1314*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1315*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1316*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1317*4757b351SPierre Pronchery 1318*4757b351SPierre Pronchery mov w6,v0.s[0] 1319*4757b351SPierre Pronchery mov w7,v1.s[0] 1320*4757b351SPierre Pronchery mov w9,v2.s[0] 1321*4757b351SPierre Pronchery add w7,w6,w7 1322*4757b351SPierre Pronchery mov w6,v3.s[0] 1323*4757b351SPierre Pronchery add w7,w7,w9 1324*4757b351SPierre Pronchery add w7,w7,w6 1325*4757b351SPierre Pronchery 1326*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1327*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1328*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1329*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1330*4757b351SPierre Pronchery eor w14,w14,w6 1331*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 1332*4757b351SPierre Pronchery eor w6,w12,w13 1333*4757b351SPierre Pronchery eor w9,w14,w8 1334*4757b351SPierre Pronchery eor w6,w6,w9 1335*4757b351SPierre Pronchery movi v1.16b,#64 1336*4757b351SPierre Pronchery movi v2.16b,#128 1337*4757b351SPierre Pronchery movi v3.16b,#192 1338*4757b351SPierre Pronchery mov v0.s[0],w6 1339*4757b351SPierre Pronchery 1340*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1341*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1342*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1343*4757b351SPierre Pronchery 1344*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1345*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1346*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1347*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1348*4757b351SPierre Pronchery 1349*4757b351SPierre Pronchery mov w6,v0.s[0] 1350*4757b351SPierre Pronchery mov w7,v1.s[0] 1351*4757b351SPierre Pronchery mov w9,v2.s[0] 1352*4757b351SPierre Pronchery add w7,w6,w7 1353*4757b351SPierre Pronchery mov w6,v3.s[0] 1354*4757b351SPierre Pronchery add w7,w7,w9 1355*4757b351SPierre Pronchery add w7,w7,w6 1356*4757b351SPierre Pronchery 1357*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1358*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1359*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1360*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1361*4757b351SPierre Pronchery eor w15,w15,w6 1362*4757b351SPierre Pronchery subs w11,w11,#1 1363*4757b351SPierre Pronchery b.ne 10b 1364*4757b351SPierre Pronchery mov v4.s[0],w15 1365*4757b351SPierre Pronchery mov v4.s[1],w14 1366*4757b351SPierre Pronchery mov v4.s[2],w13 1367*4757b351SPierre Pronchery mov v4.s[3],w12 1368*4757b351SPierre Pronchery eor v5.16b,v5.16b,v4.16b 1369*4757b351SPierre Pronchery mov x10,x3 1370*4757b351SPierre Pronchery mov w11,#8 1371*4757b351SPierre Pronchery mov w12,v5.s[0] 1372*4757b351SPierre Pronchery mov w13,v5.s[1] 1373*4757b351SPierre Pronchery mov w14,v5.s[2] 1374*4757b351SPierre Pronchery mov w15,v5.s[3] 1375*4757b351SPierre Pronchery10: 1376*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1377*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 1378*4757b351SPierre Pronchery eor w6,w14,w15 1379*4757b351SPierre Pronchery eor w9,w7,w13 1380*4757b351SPierre Pronchery eor w6,w6,w9 1381*4757b351SPierre Pronchery movi v1.16b,#64 1382*4757b351SPierre Pronchery movi v2.16b,#128 1383*4757b351SPierre Pronchery movi v3.16b,#192 1384*4757b351SPierre Pronchery mov v0.s[0],w6 1385*4757b351SPierre Pronchery 1386*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1387*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1388*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1389*4757b351SPierre Pronchery 1390*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1391*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1392*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1393*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1394*4757b351SPierre Pronchery 1395*4757b351SPierre Pronchery mov w6,v0.s[0] 1396*4757b351SPierre Pronchery mov w7,v1.s[0] 1397*4757b351SPierre Pronchery mov w9,v2.s[0] 1398*4757b351SPierre Pronchery add w7,w6,w7 1399*4757b351SPierre Pronchery mov w6,v3.s[0] 1400*4757b351SPierre Pronchery add w7,w7,w9 1401*4757b351SPierre Pronchery add w7,w7,w6 1402*4757b351SPierre Pronchery 1403*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1404*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1405*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1406*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1407*4757b351SPierre Pronchery eor w12,w12,w6 1408*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 1409*4757b351SPierre Pronchery eor w6,w14,w15 1410*4757b351SPierre Pronchery eor w9,w12,w8 1411*4757b351SPierre Pronchery eor w6,w6,w9 1412*4757b351SPierre Pronchery movi v1.16b,#64 1413*4757b351SPierre Pronchery movi v2.16b,#128 1414*4757b351SPierre Pronchery movi v3.16b,#192 1415*4757b351SPierre Pronchery mov v0.s[0],w6 1416*4757b351SPierre Pronchery 1417*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1418*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1419*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1420*4757b351SPierre Pronchery 1421*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1422*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1423*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1424*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1425*4757b351SPierre Pronchery 1426*4757b351SPierre Pronchery mov w6,v0.s[0] 1427*4757b351SPierre Pronchery mov w7,v1.s[0] 1428*4757b351SPierre Pronchery mov w9,v2.s[0] 1429*4757b351SPierre Pronchery add w7,w6,w7 1430*4757b351SPierre Pronchery mov w6,v3.s[0] 1431*4757b351SPierre Pronchery add w7,w7,w9 1432*4757b351SPierre Pronchery add w7,w7,w6 1433*4757b351SPierre Pronchery 1434*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1435*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1436*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1437*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1438*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1439*4757b351SPierre Pronchery eor w13,w13,w6 1440*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 1441*4757b351SPierre Pronchery eor w6,w12,w13 1442*4757b351SPierre Pronchery eor w9,w7,w15 1443*4757b351SPierre Pronchery eor w6,w6,w9 1444*4757b351SPierre Pronchery movi v1.16b,#64 1445*4757b351SPierre Pronchery movi v2.16b,#128 1446*4757b351SPierre Pronchery movi v3.16b,#192 1447*4757b351SPierre Pronchery mov v0.s[0],w6 1448*4757b351SPierre Pronchery 1449*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1450*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1451*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1452*4757b351SPierre Pronchery 1453*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1454*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1455*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1456*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1457*4757b351SPierre Pronchery 1458*4757b351SPierre Pronchery mov w6,v0.s[0] 1459*4757b351SPierre Pronchery mov w7,v1.s[0] 1460*4757b351SPierre Pronchery mov w9,v2.s[0] 1461*4757b351SPierre Pronchery add w7,w6,w7 1462*4757b351SPierre Pronchery mov w6,v3.s[0] 1463*4757b351SPierre Pronchery add w7,w7,w9 1464*4757b351SPierre Pronchery add w7,w7,w6 1465*4757b351SPierre Pronchery 1466*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1467*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1468*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1469*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1470*4757b351SPierre Pronchery eor w14,w14,w6 1471*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 1472*4757b351SPierre Pronchery eor w6,w12,w13 1473*4757b351SPierre Pronchery eor w9,w14,w8 1474*4757b351SPierre Pronchery eor w6,w6,w9 1475*4757b351SPierre Pronchery movi v1.16b,#64 1476*4757b351SPierre Pronchery movi v2.16b,#128 1477*4757b351SPierre Pronchery movi v3.16b,#192 1478*4757b351SPierre Pronchery mov v0.s[0],w6 1479*4757b351SPierre Pronchery 1480*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1481*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1482*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1483*4757b351SPierre Pronchery 1484*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1485*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1486*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1487*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1488*4757b351SPierre Pronchery 1489*4757b351SPierre Pronchery mov w6,v0.s[0] 1490*4757b351SPierre Pronchery mov w7,v1.s[0] 1491*4757b351SPierre Pronchery mov w9,v2.s[0] 1492*4757b351SPierre Pronchery add w7,w6,w7 1493*4757b351SPierre Pronchery mov w6,v3.s[0] 1494*4757b351SPierre Pronchery add w7,w7,w9 1495*4757b351SPierre Pronchery add w7,w7,w6 1496*4757b351SPierre Pronchery 1497*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1498*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1499*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1500*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1501*4757b351SPierre Pronchery eor w15,w15,w6 1502*4757b351SPierre Pronchery subs w11,w11,#1 1503*4757b351SPierre Pronchery b.ne 10b 1504*4757b351SPierre Pronchery mov v5.s[0],w15 1505*4757b351SPierre Pronchery mov v5.s[1],w14 1506*4757b351SPierre Pronchery mov v5.s[2],w13 1507*4757b351SPierre Pronchery mov v5.s[3],w12 1508*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1509*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 1510*4757b351SPierre Pronchery#endif 1511*4757b351SPierre Pronchery eor v6.16b,v6.16b,v5.16b 1512*4757b351SPierre Pronchery mov x10,x3 1513*4757b351SPierre Pronchery mov w11,#8 1514*4757b351SPierre Pronchery mov w12,v6.s[0] 1515*4757b351SPierre Pronchery mov w13,v6.s[1] 1516*4757b351SPierre Pronchery mov w14,v6.s[2] 1517*4757b351SPierre Pronchery mov w15,v6.s[3] 1518*4757b351SPierre Pronchery10: 1519*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1520*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 1521*4757b351SPierre Pronchery eor w6,w14,w15 1522*4757b351SPierre Pronchery eor w9,w7,w13 1523*4757b351SPierre Pronchery eor w6,w6,w9 1524*4757b351SPierre Pronchery movi v1.16b,#64 1525*4757b351SPierre Pronchery movi v2.16b,#128 1526*4757b351SPierre Pronchery movi v3.16b,#192 1527*4757b351SPierre Pronchery mov v0.s[0],w6 1528*4757b351SPierre Pronchery 1529*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1530*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1531*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1532*4757b351SPierre Pronchery 1533*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1534*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1535*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1536*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1537*4757b351SPierre Pronchery 1538*4757b351SPierre Pronchery mov w6,v0.s[0] 1539*4757b351SPierre Pronchery mov w7,v1.s[0] 1540*4757b351SPierre Pronchery mov w9,v2.s[0] 1541*4757b351SPierre Pronchery add w7,w6,w7 1542*4757b351SPierre Pronchery mov w6,v3.s[0] 1543*4757b351SPierre Pronchery add w7,w7,w9 1544*4757b351SPierre Pronchery add w7,w7,w6 1545*4757b351SPierre Pronchery 1546*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1547*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1548*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1549*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1550*4757b351SPierre Pronchery eor w12,w12,w6 1551*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 1552*4757b351SPierre Pronchery eor w6,w14,w15 1553*4757b351SPierre Pronchery eor w9,w12,w8 1554*4757b351SPierre Pronchery eor w6,w6,w9 1555*4757b351SPierre Pronchery movi v1.16b,#64 1556*4757b351SPierre Pronchery movi v2.16b,#128 1557*4757b351SPierre Pronchery movi v3.16b,#192 1558*4757b351SPierre Pronchery mov v0.s[0],w6 1559*4757b351SPierre Pronchery 1560*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1561*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1562*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1563*4757b351SPierre Pronchery 1564*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1565*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1566*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1567*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1568*4757b351SPierre Pronchery 1569*4757b351SPierre Pronchery mov w6,v0.s[0] 1570*4757b351SPierre Pronchery mov w7,v1.s[0] 1571*4757b351SPierre Pronchery mov w9,v2.s[0] 1572*4757b351SPierre Pronchery add w7,w6,w7 1573*4757b351SPierre Pronchery mov w6,v3.s[0] 1574*4757b351SPierre Pronchery add w7,w7,w9 1575*4757b351SPierre Pronchery add w7,w7,w6 1576*4757b351SPierre Pronchery 1577*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1578*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1579*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1580*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1581*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1582*4757b351SPierre Pronchery eor w13,w13,w6 1583*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 1584*4757b351SPierre Pronchery eor w6,w12,w13 1585*4757b351SPierre Pronchery eor w9,w7,w15 1586*4757b351SPierre Pronchery eor w6,w6,w9 1587*4757b351SPierre Pronchery movi v1.16b,#64 1588*4757b351SPierre Pronchery movi v2.16b,#128 1589*4757b351SPierre Pronchery movi v3.16b,#192 1590*4757b351SPierre Pronchery mov v0.s[0],w6 1591*4757b351SPierre Pronchery 1592*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1593*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1594*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1595*4757b351SPierre Pronchery 1596*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1597*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1598*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1599*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1600*4757b351SPierre Pronchery 1601*4757b351SPierre Pronchery mov w6,v0.s[0] 1602*4757b351SPierre Pronchery mov w7,v1.s[0] 1603*4757b351SPierre Pronchery mov w9,v2.s[0] 1604*4757b351SPierre Pronchery add w7,w6,w7 1605*4757b351SPierre Pronchery mov w6,v3.s[0] 1606*4757b351SPierre Pronchery add w7,w7,w9 1607*4757b351SPierre Pronchery add w7,w7,w6 1608*4757b351SPierre Pronchery 1609*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1610*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1611*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1612*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1613*4757b351SPierre Pronchery eor w14,w14,w6 1614*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 1615*4757b351SPierre Pronchery eor w6,w12,w13 1616*4757b351SPierre Pronchery eor w9,w14,w8 1617*4757b351SPierre Pronchery eor w6,w6,w9 1618*4757b351SPierre Pronchery movi v1.16b,#64 1619*4757b351SPierre Pronchery movi v2.16b,#128 1620*4757b351SPierre Pronchery movi v3.16b,#192 1621*4757b351SPierre Pronchery mov v0.s[0],w6 1622*4757b351SPierre Pronchery 1623*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1624*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1625*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1626*4757b351SPierre Pronchery 1627*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1628*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1629*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1630*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1631*4757b351SPierre Pronchery 1632*4757b351SPierre Pronchery mov w6,v0.s[0] 1633*4757b351SPierre Pronchery mov w7,v1.s[0] 1634*4757b351SPierre Pronchery mov w9,v2.s[0] 1635*4757b351SPierre Pronchery add w7,w6,w7 1636*4757b351SPierre Pronchery mov w6,v3.s[0] 1637*4757b351SPierre Pronchery add w7,w7,w9 1638*4757b351SPierre Pronchery add w7,w7,w6 1639*4757b351SPierre Pronchery 1640*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1641*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1642*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1643*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1644*4757b351SPierre Pronchery eor w15,w15,w6 1645*4757b351SPierre Pronchery subs w11,w11,#1 1646*4757b351SPierre Pronchery b.ne 10b 1647*4757b351SPierre Pronchery mov v6.s[0],w15 1648*4757b351SPierre Pronchery mov v6.s[1],w14 1649*4757b351SPierre Pronchery mov v6.s[2],w13 1650*4757b351SPierre Pronchery mov v6.s[3],w12 1651*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1652*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 1653*4757b351SPierre Pronchery#endif 1654*4757b351SPierre Pronchery eor v7.16b,v7.16b,v6.16b 1655*4757b351SPierre Pronchery mov x10,x3 1656*4757b351SPierre Pronchery mov w11,#8 1657*4757b351SPierre Pronchery mov w12,v7.s[0] 1658*4757b351SPierre Pronchery mov w13,v7.s[1] 1659*4757b351SPierre Pronchery mov w14,v7.s[2] 1660*4757b351SPierre Pronchery mov w15,v7.s[3] 1661*4757b351SPierre Pronchery10: 1662*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1663*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 1664*4757b351SPierre Pronchery eor w6,w14,w15 1665*4757b351SPierre Pronchery eor w9,w7,w13 1666*4757b351SPierre Pronchery eor w6,w6,w9 1667*4757b351SPierre Pronchery movi v1.16b,#64 1668*4757b351SPierre Pronchery movi v2.16b,#128 1669*4757b351SPierre Pronchery movi v3.16b,#192 1670*4757b351SPierre Pronchery mov v0.s[0],w6 1671*4757b351SPierre Pronchery 1672*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1673*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1674*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1675*4757b351SPierre Pronchery 1676*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1677*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1678*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1679*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1680*4757b351SPierre Pronchery 1681*4757b351SPierre Pronchery mov w6,v0.s[0] 1682*4757b351SPierre Pronchery mov w7,v1.s[0] 1683*4757b351SPierre Pronchery mov w9,v2.s[0] 1684*4757b351SPierre Pronchery add w7,w6,w7 1685*4757b351SPierre Pronchery mov w6,v3.s[0] 1686*4757b351SPierre Pronchery add w7,w7,w9 1687*4757b351SPierre Pronchery add w7,w7,w6 1688*4757b351SPierre Pronchery 1689*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1690*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1691*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1692*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1693*4757b351SPierre Pronchery eor w12,w12,w6 1694*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 1695*4757b351SPierre Pronchery eor w6,w14,w15 1696*4757b351SPierre Pronchery eor w9,w12,w8 1697*4757b351SPierre Pronchery eor w6,w6,w9 1698*4757b351SPierre Pronchery movi v1.16b,#64 1699*4757b351SPierre Pronchery movi v2.16b,#128 1700*4757b351SPierre Pronchery movi v3.16b,#192 1701*4757b351SPierre Pronchery mov v0.s[0],w6 1702*4757b351SPierre Pronchery 1703*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1704*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1705*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1706*4757b351SPierre Pronchery 1707*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1708*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1709*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1710*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1711*4757b351SPierre Pronchery 1712*4757b351SPierre Pronchery mov w6,v0.s[0] 1713*4757b351SPierre Pronchery mov w7,v1.s[0] 1714*4757b351SPierre Pronchery mov w9,v2.s[0] 1715*4757b351SPierre Pronchery add w7,w6,w7 1716*4757b351SPierre Pronchery mov w6,v3.s[0] 1717*4757b351SPierre Pronchery add w7,w7,w9 1718*4757b351SPierre Pronchery add w7,w7,w6 1719*4757b351SPierre Pronchery 1720*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1721*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1722*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1723*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1724*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1725*4757b351SPierre Pronchery eor w13,w13,w6 1726*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 1727*4757b351SPierre Pronchery eor w6,w12,w13 1728*4757b351SPierre Pronchery eor w9,w7,w15 1729*4757b351SPierre Pronchery eor w6,w6,w9 1730*4757b351SPierre Pronchery movi v1.16b,#64 1731*4757b351SPierre Pronchery movi v2.16b,#128 1732*4757b351SPierre Pronchery movi v3.16b,#192 1733*4757b351SPierre Pronchery mov v0.s[0],w6 1734*4757b351SPierre Pronchery 1735*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1736*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1737*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1738*4757b351SPierre Pronchery 1739*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1740*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1741*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1742*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1743*4757b351SPierre Pronchery 1744*4757b351SPierre Pronchery mov w6,v0.s[0] 1745*4757b351SPierre Pronchery mov w7,v1.s[0] 1746*4757b351SPierre Pronchery mov w9,v2.s[0] 1747*4757b351SPierre Pronchery add w7,w6,w7 1748*4757b351SPierre Pronchery mov w6,v3.s[0] 1749*4757b351SPierre Pronchery add w7,w7,w9 1750*4757b351SPierre Pronchery add w7,w7,w6 1751*4757b351SPierre Pronchery 1752*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1753*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1754*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1755*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1756*4757b351SPierre Pronchery eor w14,w14,w6 1757*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 1758*4757b351SPierre Pronchery eor w6,w12,w13 1759*4757b351SPierre Pronchery eor w9,w14,w8 1760*4757b351SPierre Pronchery eor w6,w6,w9 1761*4757b351SPierre Pronchery movi v1.16b,#64 1762*4757b351SPierre Pronchery movi v2.16b,#128 1763*4757b351SPierre Pronchery movi v3.16b,#192 1764*4757b351SPierre Pronchery mov v0.s[0],w6 1765*4757b351SPierre Pronchery 1766*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1767*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1768*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1769*4757b351SPierre Pronchery 1770*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1771*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1772*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1773*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1774*4757b351SPierre Pronchery 1775*4757b351SPierre Pronchery mov w6,v0.s[0] 1776*4757b351SPierre Pronchery mov w7,v1.s[0] 1777*4757b351SPierre Pronchery mov w9,v2.s[0] 1778*4757b351SPierre Pronchery add w7,w6,w7 1779*4757b351SPierre Pronchery mov w6,v3.s[0] 1780*4757b351SPierre Pronchery add w7,w7,w9 1781*4757b351SPierre Pronchery add w7,w7,w6 1782*4757b351SPierre Pronchery 1783*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1784*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1785*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1786*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1787*4757b351SPierre Pronchery eor w15,w15,w6 1788*4757b351SPierre Pronchery subs w11,w11,#1 1789*4757b351SPierre Pronchery b.ne 10b 1790*4757b351SPierre Pronchery mov v7.s[0],w15 1791*4757b351SPierre Pronchery mov v7.s[1],w14 1792*4757b351SPierre Pronchery mov v7.s[2],w13 1793*4757b351SPierre Pronchery mov v7.s[3],w12 1794*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1795*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 1796*4757b351SPierre Pronchery#endif 1797*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1798*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 1799*4757b351SPierre Pronchery#endif 1800*4757b351SPierre Pronchery orr v3.16b,v7.16b,v7.16b 1801*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 1802*4757b351SPierre Pronchery subs w2,w2,#4 1803*4757b351SPierre Pronchery b.ne .Lcbc_4_blocks_enc 1804*4757b351SPierre Pronchery b 2f 1805*4757b351SPierre Pronchery1: 1806*4757b351SPierre Pronchery subs w2,w2,#1 1807*4757b351SPierre Pronchery b.lt 2f 1808*4757b351SPierre Pronchery ld1 {v4.4s},[x0],#16 1809*4757b351SPierre Pronchery eor v3.16b,v3.16b,v4.16b 1810*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1811*4757b351SPierre Pronchery rev32 v3.16b,v3.16b 1812*4757b351SPierre Pronchery#endif 1813*4757b351SPierre Pronchery mov x10,x3 1814*4757b351SPierre Pronchery mov w11,#8 1815*4757b351SPierre Pronchery mov w12,v3.s[0] 1816*4757b351SPierre Pronchery mov w13,v3.s[1] 1817*4757b351SPierre Pronchery mov w14,v3.s[2] 1818*4757b351SPierre Pronchery mov w15,v3.s[3] 1819*4757b351SPierre Pronchery10: 1820*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1821*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 1822*4757b351SPierre Pronchery eor w6,w14,w15 1823*4757b351SPierre Pronchery eor w9,w7,w13 1824*4757b351SPierre Pronchery eor w6,w6,w9 1825*4757b351SPierre Pronchery movi v1.16b,#64 1826*4757b351SPierre Pronchery movi v2.16b,#128 1827*4757b351SPierre Pronchery movi v3.16b,#192 1828*4757b351SPierre Pronchery mov v0.s[0],w6 1829*4757b351SPierre Pronchery 1830*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1831*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1832*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1833*4757b351SPierre Pronchery 1834*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1835*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1836*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1837*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1838*4757b351SPierre Pronchery 1839*4757b351SPierre Pronchery mov w6,v0.s[0] 1840*4757b351SPierre Pronchery mov w7,v1.s[0] 1841*4757b351SPierre Pronchery mov w9,v2.s[0] 1842*4757b351SPierre Pronchery add w7,w6,w7 1843*4757b351SPierre Pronchery mov w6,v3.s[0] 1844*4757b351SPierre Pronchery add w7,w7,w9 1845*4757b351SPierre Pronchery add w7,w7,w6 1846*4757b351SPierre Pronchery 1847*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1848*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1849*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1850*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1851*4757b351SPierre Pronchery eor w12,w12,w6 1852*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 1853*4757b351SPierre Pronchery eor w6,w14,w15 1854*4757b351SPierre Pronchery eor w9,w12,w8 1855*4757b351SPierre Pronchery eor w6,w6,w9 1856*4757b351SPierre Pronchery movi v1.16b,#64 1857*4757b351SPierre Pronchery movi v2.16b,#128 1858*4757b351SPierre Pronchery movi v3.16b,#192 1859*4757b351SPierre Pronchery mov v0.s[0],w6 1860*4757b351SPierre Pronchery 1861*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1862*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1863*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1864*4757b351SPierre Pronchery 1865*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1866*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1867*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1868*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1869*4757b351SPierre Pronchery 1870*4757b351SPierre Pronchery mov w6,v0.s[0] 1871*4757b351SPierre Pronchery mov w7,v1.s[0] 1872*4757b351SPierre Pronchery mov w9,v2.s[0] 1873*4757b351SPierre Pronchery add w7,w6,w7 1874*4757b351SPierre Pronchery mov w6,v3.s[0] 1875*4757b351SPierre Pronchery add w7,w7,w9 1876*4757b351SPierre Pronchery add w7,w7,w6 1877*4757b351SPierre Pronchery 1878*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1879*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1880*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1881*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1882*4757b351SPierre Pronchery ldp w7,w8,[x10],8 1883*4757b351SPierre Pronchery eor w13,w13,w6 1884*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 1885*4757b351SPierre Pronchery eor w6,w12,w13 1886*4757b351SPierre Pronchery eor w9,w7,w15 1887*4757b351SPierre Pronchery eor w6,w6,w9 1888*4757b351SPierre Pronchery movi v1.16b,#64 1889*4757b351SPierre Pronchery movi v2.16b,#128 1890*4757b351SPierre Pronchery movi v3.16b,#192 1891*4757b351SPierre Pronchery mov v0.s[0],w6 1892*4757b351SPierre Pronchery 1893*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1894*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1895*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1896*4757b351SPierre Pronchery 1897*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1898*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1899*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1900*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1901*4757b351SPierre Pronchery 1902*4757b351SPierre Pronchery mov w6,v0.s[0] 1903*4757b351SPierre Pronchery mov w7,v1.s[0] 1904*4757b351SPierre Pronchery mov w9,v2.s[0] 1905*4757b351SPierre Pronchery add w7,w6,w7 1906*4757b351SPierre Pronchery mov w6,v3.s[0] 1907*4757b351SPierre Pronchery add w7,w7,w9 1908*4757b351SPierre Pronchery add w7,w7,w6 1909*4757b351SPierre Pronchery 1910*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1911*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1912*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1913*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1914*4757b351SPierre Pronchery eor w14,w14,w6 1915*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 1916*4757b351SPierre Pronchery eor w6,w12,w13 1917*4757b351SPierre Pronchery eor w9,w14,w8 1918*4757b351SPierre Pronchery eor w6,w6,w9 1919*4757b351SPierre Pronchery movi v1.16b,#64 1920*4757b351SPierre Pronchery movi v2.16b,#128 1921*4757b351SPierre Pronchery movi v3.16b,#192 1922*4757b351SPierre Pronchery mov v0.s[0],w6 1923*4757b351SPierre Pronchery 1924*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 1925*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 1926*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 1927*4757b351SPierre Pronchery 1928*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 1929*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 1930*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 1931*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 1932*4757b351SPierre Pronchery 1933*4757b351SPierre Pronchery mov w6,v0.s[0] 1934*4757b351SPierre Pronchery mov w7,v1.s[0] 1935*4757b351SPierre Pronchery mov w9,v2.s[0] 1936*4757b351SPierre Pronchery add w7,w6,w7 1937*4757b351SPierre Pronchery mov w6,v3.s[0] 1938*4757b351SPierre Pronchery add w7,w7,w9 1939*4757b351SPierre Pronchery add w7,w7,w6 1940*4757b351SPierre Pronchery 1941*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 1942*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 1943*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 1944*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 1945*4757b351SPierre Pronchery eor w15,w15,w6 1946*4757b351SPierre Pronchery subs w11,w11,#1 1947*4757b351SPierre Pronchery b.ne 10b 1948*4757b351SPierre Pronchery mov v3.s[0],w15 1949*4757b351SPierre Pronchery mov v3.s[1],w14 1950*4757b351SPierre Pronchery mov v3.s[2],w13 1951*4757b351SPierre Pronchery mov v3.s[3],w12 1952*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1953*4757b351SPierre Pronchery rev32 v3.16b,v3.16b 1954*4757b351SPierre Pronchery#endif 1955*4757b351SPierre Pronchery st1 {v3.4s},[x1],#16 1956*4757b351SPierre Pronchery b 1b 1957*4757b351SPierre Pronchery2: 1958*4757b351SPierre Pronchery // save back IV 1959*4757b351SPierre Pronchery st1 {v3.4s},[x4] 1960*4757b351SPierre Pronchery ret 1961*4757b351SPierre Pronchery 1962*4757b351SPierre Pronchery.Ldec: 1963*4757b351SPierre Pronchery // decryption mode starts 1964*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 1965*4757b351SPierre Pronchery stp d8,d9,[sp,#-80]! 1966*4757b351SPierre Pronchery stp d10,d11,[sp,#16] 1967*4757b351SPierre Pronchery stp d12,d13,[sp,#32] 1968*4757b351SPierre Pronchery stp d14,d15,[sp,#48] 1969*4757b351SPierre Pronchery stp x29,x30,[sp,#64] 1970*4757b351SPierre Pronchery.Lcbc_8_blocks_dec: 1971*4757b351SPierre Pronchery cmp w2,#8 1972*4757b351SPierre Pronchery b.lt 1f 1973*4757b351SPierre Pronchery ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0] 1974*4757b351SPierre Pronchery add x10,x0,#64 1975*4757b351SPierre Pronchery ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x10] 1976*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1977*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 1978*4757b351SPierre Pronchery#endif 1979*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1980*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 1981*4757b351SPierre Pronchery#endif 1982*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1983*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 1984*4757b351SPierre Pronchery#endif 1985*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1986*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 1987*4757b351SPierre Pronchery#endif 1988*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1989*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 1990*4757b351SPierre Pronchery#endif 1991*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1992*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 1993*4757b351SPierre Pronchery#endif 1994*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1995*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 1996*4757b351SPierre Pronchery#endif 1997*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 1998*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 1999*4757b351SPierre Pronchery#endif 2000*4757b351SPierre Pronchery bl _vpsm4_enc_8blks 2001*4757b351SPierre Pronchery zip1 v8.4s,v0.4s,v1.4s 2002*4757b351SPierre Pronchery zip2 v9.4s,v0.4s,v1.4s 2003*4757b351SPierre Pronchery zip1 v10.4s,v2.4s,v3.4s 2004*4757b351SPierre Pronchery zip2 v11.4s,v2.4s,v3.4s 2005*4757b351SPierre Pronchery zip1 v0.2d,v8.2d,v10.2d 2006*4757b351SPierre Pronchery zip2 v1.2d,v8.2d,v10.2d 2007*4757b351SPierre Pronchery zip1 v2.2d,v9.2d,v11.2d 2008*4757b351SPierre Pronchery zip2 v3.2d,v9.2d,v11.2d 2009*4757b351SPierre Pronchery zip1 v8.4s,v4.4s,v5.4s 2010*4757b351SPierre Pronchery zip2 v9.4s,v4.4s,v5.4s 2011*4757b351SPierre Pronchery zip1 v10.4s,v6.4s,v7.4s 2012*4757b351SPierre Pronchery zip2 v11.4s,v6.4s,v7.4s 2013*4757b351SPierre Pronchery zip1 v4.2d,v8.2d,v10.2d 2014*4757b351SPierre Pronchery zip2 v5.2d,v8.2d,v10.2d 2015*4757b351SPierre Pronchery zip1 v6.2d,v9.2d,v11.2d 2016*4757b351SPierre Pronchery zip2 v7.2d,v9.2d,v11.2d 2017*4757b351SPierre Pronchery ld1 {v15.4s},[x4] 2018*4757b351SPierre Pronchery ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 2019*4757b351SPierre Pronchery // note ivec1 and vtmpx[3] are reusing the same register 2020*4757b351SPierre Pronchery // care needs to be taken to avoid conflict 2021*4757b351SPierre Pronchery eor v0.16b,v0.16b,v15.16b 2022*4757b351SPierre Pronchery ld1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 2023*4757b351SPierre Pronchery eor v1.16b,v1.16b,v8.16b 2024*4757b351SPierre Pronchery eor v2.16b,v2.16b,v9.16b 2025*4757b351SPierre Pronchery eor v3.16b,v3.16b,v10.16b 2026*4757b351SPierre Pronchery // save back IV 2027*4757b351SPierre Pronchery st1 {v15.4s}, [x4] 2028*4757b351SPierre Pronchery eor v4.16b,v4.16b,v11.16b 2029*4757b351SPierre Pronchery eor v5.16b,v5.16b,v12.16b 2030*4757b351SPierre Pronchery eor v6.16b,v6.16b,v13.16b 2031*4757b351SPierre Pronchery eor v7.16b,v7.16b,v14.16b 2032*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 2033*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 2034*4757b351SPierre Pronchery subs w2,w2,#8 2035*4757b351SPierre Pronchery b.gt .Lcbc_8_blocks_dec 2036*4757b351SPierre Pronchery b.eq 100f 2037*4757b351SPierre Pronchery1: 2038*4757b351SPierre Pronchery ld1 {v15.4s},[x4] 2039*4757b351SPierre Pronchery.Lcbc_4_blocks_dec: 2040*4757b351SPierre Pronchery cmp w2,#4 2041*4757b351SPierre Pronchery b.lt 1f 2042*4757b351SPierre Pronchery ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0] 2043*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2044*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 2045*4757b351SPierre Pronchery#endif 2046*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2047*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 2048*4757b351SPierre Pronchery#endif 2049*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2050*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 2051*4757b351SPierre Pronchery#endif 2052*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2053*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 2054*4757b351SPierre Pronchery#endif 2055*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 2056*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 2057*4757b351SPierre Pronchery zip1 v8.4s,v0.4s,v1.4s 2058*4757b351SPierre Pronchery zip2 v9.4s,v0.4s,v1.4s 2059*4757b351SPierre Pronchery zip1 v10.4s,v2.4s,v3.4s 2060*4757b351SPierre Pronchery zip2 v11.4s,v2.4s,v3.4s 2061*4757b351SPierre Pronchery zip1 v0.2d,v8.2d,v10.2d 2062*4757b351SPierre Pronchery zip2 v1.2d,v8.2d,v10.2d 2063*4757b351SPierre Pronchery zip1 v2.2d,v9.2d,v11.2d 2064*4757b351SPierre Pronchery zip2 v3.2d,v9.2d,v11.2d 2065*4757b351SPierre Pronchery eor v0.16b,v0.16b,v15.16b 2066*4757b351SPierre Pronchery eor v1.16b,v1.16b,v4.16b 2067*4757b351SPierre Pronchery orr v15.16b,v7.16b,v7.16b 2068*4757b351SPierre Pronchery eor v2.16b,v2.16b,v5.16b 2069*4757b351SPierre Pronchery eor v3.16b,v3.16b,v6.16b 2070*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 2071*4757b351SPierre Pronchery subs w2,w2,#4 2072*4757b351SPierre Pronchery b.gt .Lcbc_4_blocks_dec 2073*4757b351SPierre Pronchery // save back IV 2074*4757b351SPierre Pronchery st1 {v7.4s}, [x4] 2075*4757b351SPierre Pronchery b 100f 2076*4757b351SPierre Pronchery1: // last block 2077*4757b351SPierre Pronchery subs w2,w2,#1 2078*4757b351SPierre Pronchery b.lt 100f 2079*4757b351SPierre Pronchery b.gt 1f 2080*4757b351SPierre Pronchery ld1 {v4.4s},[x0],#16 2081*4757b351SPierre Pronchery // save back IV 2082*4757b351SPierre Pronchery st1 {v4.4s}, [x4] 2083*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2084*4757b351SPierre Pronchery rev32 v8.16b,v4.16b 2085*4757b351SPierre Pronchery#else 2086*4757b351SPierre Pronchery mov v8.16b,v4.16b 2087*4757b351SPierre Pronchery#endif 2088*4757b351SPierre Pronchery mov x10,x3 2089*4757b351SPierre Pronchery mov w11,#8 2090*4757b351SPierre Pronchery mov w12,v8.s[0] 2091*4757b351SPierre Pronchery mov w13,v8.s[1] 2092*4757b351SPierre Pronchery mov w14,v8.s[2] 2093*4757b351SPierre Pronchery mov w15,v8.s[3] 2094*4757b351SPierre Pronchery10: 2095*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2096*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 2097*4757b351SPierre Pronchery eor w6,w14,w15 2098*4757b351SPierre Pronchery eor w9,w7,w13 2099*4757b351SPierre Pronchery eor w6,w6,w9 2100*4757b351SPierre Pronchery movi v1.16b,#64 2101*4757b351SPierre Pronchery movi v2.16b,#128 2102*4757b351SPierre Pronchery movi v3.16b,#192 2103*4757b351SPierre Pronchery mov v0.s[0],w6 2104*4757b351SPierre Pronchery 2105*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2106*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2107*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2108*4757b351SPierre Pronchery 2109*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2110*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2111*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2112*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2113*4757b351SPierre Pronchery 2114*4757b351SPierre Pronchery mov w6,v0.s[0] 2115*4757b351SPierre Pronchery mov w7,v1.s[0] 2116*4757b351SPierre Pronchery mov w9,v2.s[0] 2117*4757b351SPierre Pronchery add w7,w6,w7 2118*4757b351SPierre Pronchery mov w6,v3.s[0] 2119*4757b351SPierre Pronchery add w7,w7,w9 2120*4757b351SPierre Pronchery add w7,w7,w6 2121*4757b351SPierre Pronchery 2122*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2123*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2124*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2125*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2126*4757b351SPierre Pronchery eor w12,w12,w6 2127*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 2128*4757b351SPierre Pronchery eor w6,w14,w15 2129*4757b351SPierre Pronchery eor w9,w12,w8 2130*4757b351SPierre Pronchery eor w6,w6,w9 2131*4757b351SPierre Pronchery movi v1.16b,#64 2132*4757b351SPierre Pronchery movi v2.16b,#128 2133*4757b351SPierre Pronchery movi v3.16b,#192 2134*4757b351SPierre Pronchery mov v0.s[0],w6 2135*4757b351SPierre Pronchery 2136*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2137*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2138*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2139*4757b351SPierre Pronchery 2140*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2141*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2142*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2143*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2144*4757b351SPierre Pronchery 2145*4757b351SPierre Pronchery mov w6,v0.s[0] 2146*4757b351SPierre Pronchery mov w7,v1.s[0] 2147*4757b351SPierre Pronchery mov w9,v2.s[0] 2148*4757b351SPierre Pronchery add w7,w6,w7 2149*4757b351SPierre Pronchery mov w6,v3.s[0] 2150*4757b351SPierre Pronchery add w7,w7,w9 2151*4757b351SPierre Pronchery add w7,w7,w6 2152*4757b351SPierre Pronchery 2153*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2154*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2155*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2156*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2157*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2158*4757b351SPierre Pronchery eor w13,w13,w6 2159*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 2160*4757b351SPierre Pronchery eor w6,w12,w13 2161*4757b351SPierre Pronchery eor w9,w7,w15 2162*4757b351SPierre Pronchery eor w6,w6,w9 2163*4757b351SPierre Pronchery movi v1.16b,#64 2164*4757b351SPierre Pronchery movi v2.16b,#128 2165*4757b351SPierre Pronchery movi v3.16b,#192 2166*4757b351SPierre Pronchery mov v0.s[0],w6 2167*4757b351SPierre Pronchery 2168*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2169*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2170*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2171*4757b351SPierre Pronchery 2172*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2173*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2174*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2175*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2176*4757b351SPierre Pronchery 2177*4757b351SPierre Pronchery mov w6,v0.s[0] 2178*4757b351SPierre Pronchery mov w7,v1.s[0] 2179*4757b351SPierre Pronchery mov w9,v2.s[0] 2180*4757b351SPierre Pronchery add w7,w6,w7 2181*4757b351SPierre Pronchery mov w6,v3.s[0] 2182*4757b351SPierre Pronchery add w7,w7,w9 2183*4757b351SPierre Pronchery add w7,w7,w6 2184*4757b351SPierre Pronchery 2185*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2186*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2187*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2188*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2189*4757b351SPierre Pronchery eor w14,w14,w6 2190*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 2191*4757b351SPierre Pronchery eor w6,w12,w13 2192*4757b351SPierre Pronchery eor w9,w14,w8 2193*4757b351SPierre Pronchery eor w6,w6,w9 2194*4757b351SPierre Pronchery movi v1.16b,#64 2195*4757b351SPierre Pronchery movi v2.16b,#128 2196*4757b351SPierre Pronchery movi v3.16b,#192 2197*4757b351SPierre Pronchery mov v0.s[0],w6 2198*4757b351SPierre Pronchery 2199*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2200*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2201*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2202*4757b351SPierre Pronchery 2203*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2204*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2205*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2206*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2207*4757b351SPierre Pronchery 2208*4757b351SPierre Pronchery mov w6,v0.s[0] 2209*4757b351SPierre Pronchery mov w7,v1.s[0] 2210*4757b351SPierre Pronchery mov w9,v2.s[0] 2211*4757b351SPierre Pronchery add w7,w6,w7 2212*4757b351SPierre Pronchery mov w6,v3.s[0] 2213*4757b351SPierre Pronchery add w7,w7,w9 2214*4757b351SPierre Pronchery add w7,w7,w6 2215*4757b351SPierre Pronchery 2216*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2217*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2218*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2219*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2220*4757b351SPierre Pronchery eor w15,w15,w6 2221*4757b351SPierre Pronchery subs w11,w11,#1 2222*4757b351SPierre Pronchery b.ne 10b 2223*4757b351SPierre Pronchery mov v8.s[0],w15 2224*4757b351SPierre Pronchery mov v8.s[1],w14 2225*4757b351SPierre Pronchery mov v8.s[2],w13 2226*4757b351SPierre Pronchery mov v8.s[3],w12 2227*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2228*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 2229*4757b351SPierre Pronchery#endif 2230*4757b351SPierre Pronchery eor v8.16b,v8.16b,v15.16b 2231*4757b351SPierre Pronchery st1 {v8.4s},[x1],#16 2232*4757b351SPierre Pronchery b 100f 2233*4757b351SPierre Pronchery1: // last two blocks 2234*4757b351SPierre Pronchery ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0] 2235*4757b351SPierre Pronchery add x10,x0,#16 2236*4757b351SPierre Pronchery ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x10],#16 2237*4757b351SPierre Pronchery subs w2,w2,1 2238*4757b351SPierre Pronchery b.gt 1f 2239*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2240*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 2241*4757b351SPierre Pronchery#endif 2242*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2243*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 2244*4757b351SPierre Pronchery#endif 2245*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2246*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 2247*4757b351SPierre Pronchery#endif 2248*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2249*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 2250*4757b351SPierre Pronchery#endif 2251*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 2252*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s},[x0],#32 2253*4757b351SPierre Pronchery zip1 v8.4s,v0.4s,v1.4s 2254*4757b351SPierre Pronchery zip2 v9.4s,v0.4s,v1.4s 2255*4757b351SPierre Pronchery zip1 v10.4s,v2.4s,v3.4s 2256*4757b351SPierre Pronchery zip2 v11.4s,v2.4s,v3.4s 2257*4757b351SPierre Pronchery zip1 v0.2d,v8.2d,v10.2d 2258*4757b351SPierre Pronchery zip2 v1.2d,v8.2d,v10.2d 2259*4757b351SPierre Pronchery zip1 v2.2d,v9.2d,v11.2d 2260*4757b351SPierre Pronchery zip2 v3.2d,v9.2d,v11.2d 2261*4757b351SPierre Pronchery eor v0.16b,v0.16b,v15.16b 2262*4757b351SPierre Pronchery eor v1.16b,v1.16b,v4.16b 2263*4757b351SPierre Pronchery st1 {v0.4s,v1.4s},[x1],#32 2264*4757b351SPierre Pronchery // save back IV 2265*4757b351SPierre Pronchery st1 {v5.4s}, [x4] 2266*4757b351SPierre Pronchery b 100f 2267*4757b351SPierre Pronchery1: // last 3 blocks 2268*4757b351SPierre Pronchery ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x10] 2269*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2270*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 2271*4757b351SPierre Pronchery#endif 2272*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2273*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 2274*4757b351SPierre Pronchery#endif 2275*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2276*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 2277*4757b351SPierre Pronchery#endif 2278*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2279*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 2280*4757b351SPierre Pronchery#endif 2281*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 2282*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 2283*4757b351SPierre Pronchery zip1 v8.4s,v0.4s,v1.4s 2284*4757b351SPierre Pronchery zip2 v9.4s,v0.4s,v1.4s 2285*4757b351SPierre Pronchery zip1 v10.4s,v2.4s,v3.4s 2286*4757b351SPierre Pronchery zip2 v11.4s,v2.4s,v3.4s 2287*4757b351SPierre Pronchery zip1 v0.2d,v8.2d,v10.2d 2288*4757b351SPierre Pronchery zip2 v1.2d,v8.2d,v10.2d 2289*4757b351SPierre Pronchery zip1 v2.2d,v9.2d,v11.2d 2290*4757b351SPierre Pronchery zip2 v3.2d,v9.2d,v11.2d 2291*4757b351SPierre Pronchery eor v0.16b,v0.16b,v15.16b 2292*4757b351SPierre Pronchery eor v1.16b,v1.16b,v4.16b 2293*4757b351SPierre Pronchery eor v2.16b,v2.16b,v5.16b 2294*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s},[x1],#48 2295*4757b351SPierre Pronchery // save back IV 2296*4757b351SPierre Pronchery st1 {v6.4s}, [x4] 2297*4757b351SPierre Pronchery100: 2298*4757b351SPierre Pronchery ldp d10,d11,[sp,#16] 2299*4757b351SPierre Pronchery ldp d12,d13,[sp,#32] 2300*4757b351SPierre Pronchery ldp d14,d15,[sp,#48] 2301*4757b351SPierre Pronchery ldp x29,x30,[sp,#64] 2302*4757b351SPierre Pronchery ldp d8,d9,[sp],#80 2303*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 2304*4757b351SPierre Pronchery ret 2305*4757b351SPierre Pronchery.size vpsm4_cbc_encrypt,.-vpsm4_cbc_encrypt 2306*4757b351SPierre Pronchery.globl vpsm4_ctr32_encrypt_blocks 2307*4757b351SPierre Pronchery.type vpsm4_ctr32_encrypt_blocks,%function 2308*4757b351SPierre Pronchery.align 5 2309*4757b351SPierre Proncheryvpsm4_ctr32_encrypt_blocks: 2310*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 2311*4757b351SPierre Pronchery ld1 {v3.4s},[x4] 2312*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2313*4757b351SPierre Pronchery rev32 v3.16b,v3.16b 2314*4757b351SPierre Pronchery#endif 2315*4757b351SPierre Pronchery adrp x10,.Lsbox 2316*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 2317*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 2318*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 2319*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 2320*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 2321*4757b351SPierre Pronchery cmp w2,#1 2322*4757b351SPierre Pronchery b.ne 1f 2323*4757b351SPierre Pronchery // fast processing for one single block without 2324*4757b351SPierre Pronchery // context saving overhead 2325*4757b351SPierre Pronchery mov x10,x3 2326*4757b351SPierre Pronchery mov w11,#8 2327*4757b351SPierre Pronchery mov w12,v3.s[0] 2328*4757b351SPierre Pronchery mov w13,v3.s[1] 2329*4757b351SPierre Pronchery mov w14,v3.s[2] 2330*4757b351SPierre Pronchery mov w15,v3.s[3] 2331*4757b351SPierre Pronchery10: 2332*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2333*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 2334*4757b351SPierre Pronchery eor w6,w14,w15 2335*4757b351SPierre Pronchery eor w9,w7,w13 2336*4757b351SPierre Pronchery eor w6,w6,w9 2337*4757b351SPierre Pronchery movi v1.16b,#64 2338*4757b351SPierre Pronchery movi v2.16b,#128 2339*4757b351SPierre Pronchery movi v3.16b,#192 2340*4757b351SPierre Pronchery mov v0.s[0],w6 2341*4757b351SPierre Pronchery 2342*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2343*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2344*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2345*4757b351SPierre Pronchery 2346*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2347*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2348*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2349*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2350*4757b351SPierre Pronchery 2351*4757b351SPierre Pronchery mov w6,v0.s[0] 2352*4757b351SPierre Pronchery mov w7,v1.s[0] 2353*4757b351SPierre Pronchery mov w9,v2.s[0] 2354*4757b351SPierre Pronchery add w7,w6,w7 2355*4757b351SPierre Pronchery mov w6,v3.s[0] 2356*4757b351SPierre Pronchery add w7,w7,w9 2357*4757b351SPierre Pronchery add w7,w7,w6 2358*4757b351SPierre Pronchery 2359*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2360*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2361*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2362*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2363*4757b351SPierre Pronchery eor w12,w12,w6 2364*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 2365*4757b351SPierre Pronchery eor w6,w14,w15 2366*4757b351SPierre Pronchery eor w9,w12,w8 2367*4757b351SPierre Pronchery eor w6,w6,w9 2368*4757b351SPierre Pronchery movi v1.16b,#64 2369*4757b351SPierre Pronchery movi v2.16b,#128 2370*4757b351SPierre Pronchery movi v3.16b,#192 2371*4757b351SPierre Pronchery mov v0.s[0],w6 2372*4757b351SPierre Pronchery 2373*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2374*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2375*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2376*4757b351SPierre Pronchery 2377*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2378*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2379*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2380*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2381*4757b351SPierre Pronchery 2382*4757b351SPierre Pronchery mov w6,v0.s[0] 2383*4757b351SPierre Pronchery mov w7,v1.s[0] 2384*4757b351SPierre Pronchery mov w9,v2.s[0] 2385*4757b351SPierre Pronchery add w7,w6,w7 2386*4757b351SPierre Pronchery mov w6,v3.s[0] 2387*4757b351SPierre Pronchery add w7,w7,w9 2388*4757b351SPierre Pronchery add w7,w7,w6 2389*4757b351SPierre Pronchery 2390*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2391*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2392*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2393*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2394*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2395*4757b351SPierre Pronchery eor w13,w13,w6 2396*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 2397*4757b351SPierre Pronchery eor w6,w12,w13 2398*4757b351SPierre Pronchery eor w9,w7,w15 2399*4757b351SPierre Pronchery eor w6,w6,w9 2400*4757b351SPierre Pronchery movi v1.16b,#64 2401*4757b351SPierre Pronchery movi v2.16b,#128 2402*4757b351SPierre Pronchery movi v3.16b,#192 2403*4757b351SPierre Pronchery mov v0.s[0],w6 2404*4757b351SPierre Pronchery 2405*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2406*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2407*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2408*4757b351SPierre Pronchery 2409*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2410*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2411*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2412*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2413*4757b351SPierre Pronchery 2414*4757b351SPierre Pronchery mov w6,v0.s[0] 2415*4757b351SPierre Pronchery mov w7,v1.s[0] 2416*4757b351SPierre Pronchery mov w9,v2.s[0] 2417*4757b351SPierre Pronchery add w7,w6,w7 2418*4757b351SPierre Pronchery mov w6,v3.s[0] 2419*4757b351SPierre Pronchery add w7,w7,w9 2420*4757b351SPierre Pronchery add w7,w7,w6 2421*4757b351SPierre Pronchery 2422*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2423*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2424*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2425*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2426*4757b351SPierre Pronchery eor w14,w14,w6 2427*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 2428*4757b351SPierre Pronchery eor w6,w12,w13 2429*4757b351SPierre Pronchery eor w9,w14,w8 2430*4757b351SPierre Pronchery eor w6,w6,w9 2431*4757b351SPierre Pronchery movi v1.16b,#64 2432*4757b351SPierre Pronchery movi v2.16b,#128 2433*4757b351SPierre Pronchery movi v3.16b,#192 2434*4757b351SPierre Pronchery mov v0.s[0],w6 2435*4757b351SPierre Pronchery 2436*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2437*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2438*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2439*4757b351SPierre Pronchery 2440*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2441*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2442*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2443*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2444*4757b351SPierre Pronchery 2445*4757b351SPierre Pronchery mov w6,v0.s[0] 2446*4757b351SPierre Pronchery mov w7,v1.s[0] 2447*4757b351SPierre Pronchery mov w9,v2.s[0] 2448*4757b351SPierre Pronchery add w7,w6,w7 2449*4757b351SPierre Pronchery mov w6,v3.s[0] 2450*4757b351SPierre Pronchery add w7,w7,w9 2451*4757b351SPierre Pronchery add w7,w7,w6 2452*4757b351SPierre Pronchery 2453*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2454*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2455*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2456*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2457*4757b351SPierre Pronchery eor w15,w15,w6 2458*4757b351SPierre Pronchery subs w11,w11,#1 2459*4757b351SPierre Pronchery b.ne 10b 2460*4757b351SPierre Pronchery mov v3.s[0],w15 2461*4757b351SPierre Pronchery mov v3.s[1],w14 2462*4757b351SPierre Pronchery mov v3.s[2],w13 2463*4757b351SPierre Pronchery mov v3.s[3],w12 2464*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2465*4757b351SPierre Pronchery rev32 v3.16b,v3.16b 2466*4757b351SPierre Pronchery#endif 2467*4757b351SPierre Pronchery ld1 {v4.4s},[x0] 2468*4757b351SPierre Pronchery eor v4.16b,v4.16b,v3.16b 2469*4757b351SPierre Pronchery st1 {v4.4s},[x1] 2470*4757b351SPierre Pronchery ret 2471*4757b351SPierre Pronchery1: 2472*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 2473*4757b351SPierre Pronchery stp d8,d9,[sp,#-80]! 2474*4757b351SPierre Pronchery stp d10,d11,[sp,#16] 2475*4757b351SPierre Pronchery stp d12,d13,[sp,#32] 2476*4757b351SPierre Pronchery stp d14,d15,[sp,#48] 2477*4757b351SPierre Pronchery stp x29,x30,[sp,#64] 2478*4757b351SPierre Pronchery mov w12,v3.s[0] 2479*4757b351SPierre Pronchery mov w13,v3.s[1] 2480*4757b351SPierre Pronchery mov w14,v3.s[2] 2481*4757b351SPierre Pronchery mov w5,v3.s[3] 2482*4757b351SPierre Pronchery.Lctr32_4_blocks_process: 2483*4757b351SPierre Pronchery cmp w2,#4 2484*4757b351SPierre Pronchery b.lt 1f 2485*4757b351SPierre Pronchery dup v4.4s,w12 2486*4757b351SPierre Pronchery dup v5.4s,w13 2487*4757b351SPierre Pronchery dup v6.4s,w14 2488*4757b351SPierre Pronchery mov v7.s[0],w5 2489*4757b351SPierre Pronchery add w5,w5,#1 2490*4757b351SPierre Pronchery mov v7.s[1],w5 2491*4757b351SPierre Pronchery add w5,w5,#1 2492*4757b351SPierre Pronchery mov v7.s[2],w5 2493*4757b351SPierre Pronchery add w5,w5,#1 2494*4757b351SPierre Pronchery mov v7.s[3],w5 2495*4757b351SPierre Pronchery add w5,w5,#1 2496*4757b351SPierre Pronchery cmp w2,#8 2497*4757b351SPierre Pronchery b.ge .Lctr32_8_blocks_process 2498*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 2499*4757b351SPierre Pronchery ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 2500*4757b351SPierre Pronchery eor v0.16b,v0.16b,v12.16b 2501*4757b351SPierre Pronchery eor v1.16b,v1.16b,v13.16b 2502*4757b351SPierre Pronchery eor v2.16b,v2.16b,v14.16b 2503*4757b351SPierre Pronchery eor v3.16b,v3.16b,v15.16b 2504*4757b351SPierre Pronchery st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 2505*4757b351SPierre Pronchery subs w2,w2,#4 2506*4757b351SPierre Pronchery b.ne .Lctr32_4_blocks_process 2507*4757b351SPierre Pronchery b 100f 2508*4757b351SPierre Pronchery.Lctr32_8_blocks_process: 2509*4757b351SPierre Pronchery dup v8.4s,w12 2510*4757b351SPierre Pronchery dup v9.4s,w13 2511*4757b351SPierre Pronchery dup v10.4s,w14 2512*4757b351SPierre Pronchery mov v11.s[0],w5 2513*4757b351SPierre Pronchery add w5,w5,#1 2514*4757b351SPierre Pronchery mov v11.s[1],w5 2515*4757b351SPierre Pronchery add w5,w5,#1 2516*4757b351SPierre Pronchery mov v11.s[2],w5 2517*4757b351SPierre Pronchery add w5,w5,#1 2518*4757b351SPierre Pronchery mov v11.s[3],w5 2519*4757b351SPierre Pronchery add w5,w5,#1 2520*4757b351SPierre Pronchery bl _vpsm4_enc_8blks 2521*4757b351SPierre Pronchery ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 2522*4757b351SPierre Pronchery ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 2523*4757b351SPierre Pronchery eor v0.16b,v0.16b,v12.16b 2524*4757b351SPierre Pronchery eor v1.16b,v1.16b,v13.16b 2525*4757b351SPierre Pronchery eor v2.16b,v2.16b,v14.16b 2526*4757b351SPierre Pronchery eor v3.16b,v3.16b,v15.16b 2527*4757b351SPierre Pronchery eor v4.16b,v4.16b,v8.16b 2528*4757b351SPierre Pronchery eor v5.16b,v5.16b,v9.16b 2529*4757b351SPierre Pronchery eor v6.16b,v6.16b,v10.16b 2530*4757b351SPierre Pronchery eor v7.16b,v7.16b,v11.16b 2531*4757b351SPierre Pronchery st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 2532*4757b351SPierre Pronchery st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 2533*4757b351SPierre Pronchery subs w2,w2,#8 2534*4757b351SPierre Pronchery b.ne .Lctr32_4_blocks_process 2535*4757b351SPierre Pronchery b 100f 2536*4757b351SPierre Pronchery1: // last block processing 2537*4757b351SPierre Pronchery subs w2,w2,#1 2538*4757b351SPierre Pronchery b.lt 100f 2539*4757b351SPierre Pronchery b.gt 1f 2540*4757b351SPierre Pronchery mov v3.s[0],w12 2541*4757b351SPierre Pronchery mov v3.s[1],w13 2542*4757b351SPierre Pronchery mov v3.s[2],w14 2543*4757b351SPierre Pronchery mov v3.s[3],w5 2544*4757b351SPierre Pronchery mov x10,x3 2545*4757b351SPierre Pronchery mov w11,#8 2546*4757b351SPierre Pronchery mov w12,v3.s[0] 2547*4757b351SPierre Pronchery mov w13,v3.s[1] 2548*4757b351SPierre Pronchery mov w14,v3.s[2] 2549*4757b351SPierre Pronchery mov w15,v3.s[3] 2550*4757b351SPierre Pronchery10: 2551*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2552*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 2553*4757b351SPierre Pronchery eor w6,w14,w15 2554*4757b351SPierre Pronchery eor w9,w7,w13 2555*4757b351SPierre Pronchery eor w6,w6,w9 2556*4757b351SPierre Pronchery movi v1.16b,#64 2557*4757b351SPierre Pronchery movi v2.16b,#128 2558*4757b351SPierre Pronchery movi v3.16b,#192 2559*4757b351SPierre Pronchery mov v0.s[0],w6 2560*4757b351SPierre Pronchery 2561*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2562*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2563*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2564*4757b351SPierre Pronchery 2565*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2566*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2567*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2568*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2569*4757b351SPierre Pronchery 2570*4757b351SPierre Pronchery mov w6,v0.s[0] 2571*4757b351SPierre Pronchery mov w7,v1.s[0] 2572*4757b351SPierre Pronchery mov w9,v2.s[0] 2573*4757b351SPierre Pronchery add w7,w6,w7 2574*4757b351SPierre Pronchery mov w6,v3.s[0] 2575*4757b351SPierre Pronchery add w7,w7,w9 2576*4757b351SPierre Pronchery add w7,w7,w6 2577*4757b351SPierre Pronchery 2578*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2579*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2580*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2581*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2582*4757b351SPierre Pronchery eor w12,w12,w6 2583*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 2584*4757b351SPierre Pronchery eor w6,w14,w15 2585*4757b351SPierre Pronchery eor w9,w12,w8 2586*4757b351SPierre Pronchery eor w6,w6,w9 2587*4757b351SPierre Pronchery movi v1.16b,#64 2588*4757b351SPierre Pronchery movi v2.16b,#128 2589*4757b351SPierre Pronchery movi v3.16b,#192 2590*4757b351SPierre Pronchery mov v0.s[0],w6 2591*4757b351SPierre Pronchery 2592*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2593*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2594*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2595*4757b351SPierre Pronchery 2596*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2597*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2598*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2599*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2600*4757b351SPierre Pronchery 2601*4757b351SPierre Pronchery mov w6,v0.s[0] 2602*4757b351SPierre Pronchery mov w7,v1.s[0] 2603*4757b351SPierre Pronchery mov w9,v2.s[0] 2604*4757b351SPierre Pronchery add w7,w6,w7 2605*4757b351SPierre Pronchery mov w6,v3.s[0] 2606*4757b351SPierre Pronchery add w7,w7,w9 2607*4757b351SPierre Pronchery add w7,w7,w6 2608*4757b351SPierre Pronchery 2609*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2610*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2611*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2612*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2613*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2614*4757b351SPierre Pronchery eor w13,w13,w6 2615*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 2616*4757b351SPierre Pronchery eor w6,w12,w13 2617*4757b351SPierre Pronchery eor w9,w7,w15 2618*4757b351SPierre Pronchery eor w6,w6,w9 2619*4757b351SPierre Pronchery movi v1.16b,#64 2620*4757b351SPierre Pronchery movi v2.16b,#128 2621*4757b351SPierre Pronchery movi v3.16b,#192 2622*4757b351SPierre Pronchery mov v0.s[0],w6 2623*4757b351SPierre Pronchery 2624*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2625*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2626*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2627*4757b351SPierre Pronchery 2628*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2629*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2630*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2631*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2632*4757b351SPierre Pronchery 2633*4757b351SPierre Pronchery mov w6,v0.s[0] 2634*4757b351SPierre Pronchery mov w7,v1.s[0] 2635*4757b351SPierre Pronchery mov w9,v2.s[0] 2636*4757b351SPierre Pronchery add w7,w6,w7 2637*4757b351SPierre Pronchery mov w6,v3.s[0] 2638*4757b351SPierre Pronchery add w7,w7,w9 2639*4757b351SPierre Pronchery add w7,w7,w6 2640*4757b351SPierre Pronchery 2641*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2642*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2643*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2644*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2645*4757b351SPierre Pronchery eor w14,w14,w6 2646*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 2647*4757b351SPierre Pronchery eor w6,w12,w13 2648*4757b351SPierre Pronchery eor w9,w14,w8 2649*4757b351SPierre Pronchery eor w6,w6,w9 2650*4757b351SPierre Pronchery movi v1.16b,#64 2651*4757b351SPierre Pronchery movi v2.16b,#128 2652*4757b351SPierre Pronchery movi v3.16b,#192 2653*4757b351SPierre Pronchery mov v0.s[0],w6 2654*4757b351SPierre Pronchery 2655*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2656*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2657*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2658*4757b351SPierre Pronchery 2659*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2660*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2661*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2662*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2663*4757b351SPierre Pronchery 2664*4757b351SPierre Pronchery mov w6,v0.s[0] 2665*4757b351SPierre Pronchery mov w7,v1.s[0] 2666*4757b351SPierre Pronchery mov w9,v2.s[0] 2667*4757b351SPierre Pronchery add w7,w6,w7 2668*4757b351SPierre Pronchery mov w6,v3.s[0] 2669*4757b351SPierre Pronchery add w7,w7,w9 2670*4757b351SPierre Pronchery add w7,w7,w6 2671*4757b351SPierre Pronchery 2672*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2673*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2674*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2675*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2676*4757b351SPierre Pronchery eor w15,w15,w6 2677*4757b351SPierre Pronchery subs w11,w11,#1 2678*4757b351SPierre Pronchery b.ne 10b 2679*4757b351SPierre Pronchery mov v3.s[0],w15 2680*4757b351SPierre Pronchery mov v3.s[1],w14 2681*4757b351SPierre Pronchery mov v3.s[2],w13 2682*4757b351SPierre Pronchery mov v3.s[3],w12 2683*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2684*4757b351SPierre Pronchery rev32 v3.16b,v3.16b 2685*4757b351SPierre Pronchery#endif 2686*4757b351SPierre Pronchery ld1 {v4.4s},[x0] 2687*4757b351SPierre Pronchery eor v4.16b,v4.16b,v3.16b 2688*4757b351SPierre Pronchery st1 {v4.4s},[x1] 2689*4757b351SPierre Pronchery b 100f 2690*4757b351SPierre Pronchery1: // last 2 blocks processing 2691*4757b351SPierre Pronchery dup v4.4s,w12 2692*4757b351SPierre Pronchery dup v5.4s,w13 2693*4757b351SPierre Pronchery dup v6.4s,w14 2694*4757b351SPierre Pronchery mov v7.s[0],w5 2695*4757b351SPierre Pronchery add w5,w5,#1 2696*4757b351SPierre Pronchery mov v7.s[1],w5 2697*4757b351SPierre Pronchery subs w2,w2,#1 2698*4757b351SPierre Pronchery b.ne 1f 2699*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 2700*4757b351SPierre Pronchery ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#16 2701*4757b351SPierre Pronchery ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#16 2702*4757b351SPierre Pronchery eor v0.16b,v0.16b,v12.16b 2703*4757b351SPierre Pronchery eor v1.16b,v1.16b,v13.16b 2704*4757b351SPierre Pronchery eor v2.16b,v2.16b,v14.16b 2705*4757b351SPierre Pronchery eor v3.16b,v3.16b,v15.16b 2706*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 2707*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 2708*4757b351SPierre Pronchery b 100f 2709*4757b351SPierre Pronchery1: // last 3 blocks processing 2710*4757b351SPierre Pronchery add w5,w5,#1 2711*4757b351SPierre Pronchery mov v7.s[2],w5 2712*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 2713*4757b351SPierre Pronchery ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#16 2714*4757b351SPierre Pronchery ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#16 2715*4757b351SPierre Pronchery ld4 {v12.s,v13.s,v14.s,v15.s}[2],[x0],#16 2716*4757b351SPierre Pronchery eor v0.16b,v0.16b,v12.16b 2717*4757b351SPierre Pronchery eor v1.16b,v1.16b,v13.16b 2718*4757b351SPierre Pronchery eor v2.16b,v2.16b,v14.16b 2719*4757b351SPierre Pronchery eor v3.16b,v3.16b,v15.16b 2720*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#16 2721*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#16 2722*4757b351SPierre Pronchery st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1],#16 2723*4757b351SPierre Pronchery100: 2724*4757b351SPierre Pronchery ldp d10,d11,[sp,#16] 2725*4757b351SPierre Pronchery ldp d12,d13,[sp,#32] 2726*4757b351SPierre Pronchery ldp d14,d15,[sp,#48] 2727*4757b351SPierre Pronchery ldp x29,x30,[sp,#64] 2728*4757b351SPierre Pronchery ldp d8,d9,[sp],#80 2729*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 2730*4757b351SPierre Pronchery ret 2731*4757b351SPierre Pronchery.size vpsm4_ctr32_encrypt_blocks,.-vpsm4_ctr32_encrypt_blocks 2732*4757b351SPierre Pronchery.globl vpsm4_xts_encrypt_gb 2733*4757b351SPierre Pronchery.type vpsm4_xts_encrypt_gb,%function 2734*4757b351SPierre Pronchery.align 5 2735*4757b351SPierre Proncheryvpsm4_xts_encrypt_gb: 2736*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 2737*4757b351SPierre Pronchery stp x15, x16, [sp, #-0x10]! 2738*4757b351SPierre Pronchery stp x17, x18, [sp, #-0x10]! 2739*4757b351SPierre Pronchery stp x19, x20, [sp, #-0x10]! 2740*4757b351SPierre Pronchery stp x21, x22, [sp, #-0x10]! 2741*4757b351SPierre Pronchery stp x23, x24, [sp, #-0x10]! 2742*4757b351SPierre Pronchery stp x25, x26, [sp, #-0x10]! 2743*4757b351SPierre Pronchery stp x27, x28, [sp, #-0x10]! 2744*4757b351SPierre Pronchery stp x29, x30, [sp, #-0x10]! 2745*4757b351SPierre Pronchery stp d8, d9, [sp, #-0x10]! 2746*4757b351SPierre Pronchery stp d10, d11, [sp, #-0x10]! 2747*4757b351SPierre Pronchery stp d12, d13, [sp, #-0x10]! 2748*4757b351SPierre Pronchery stp d14, d15, [sp, #-0x10]! 2749*4757b351SPierre Pronchery mov x26,x3 2750*4757b351SPierre Pronchery mov x27,x4 2751*4757b351SPierre Pronchery mov w28,w6 2752*4757b351SPierre Pronchery ld1 {v8.4s}, [x5] 2753*4757b351SPierre Pronchery mov x3,x27 2754*4757b351SPierre Pronchery adrp x10,.Lsbox 2755*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 2756*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 2757*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 2758*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 2759*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 2760*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2761*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 2762*4757b351SPierre Pronchery#endif 2763*4757b351SPierre Pronchery mov x10,x3 2764*4757b351SPierre Pronchery mov w11,#8 2765*4757b351SPierre Pronchery mov w12,v8.s[0] 2766*4757b351SPierre Pronchery mov w13,v8.s[1] 2767*4757b351SPierre Pronchery mov w14,v8.s[2] 2768*4757b351SPierre Pronchery mov w15,v8.s[3] 2769*4757b351SPierre Pronchery10: 2770*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2771*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 2772*4757b351SPierre Pronchery eor w6,w14,w15 2773*4757b351SPierre Pronchery eor w9,w7,w13 2774*4757b351SPierre Pronchery eor w6,w6,w9 2775*4757b351SPierre Pronchery movi v1.16b,#64 2776*4757b351SPierre Pronchery movi v2.16b,#128 2777*4757b351SPierre Pronchery movi v3.16b,#192 2778*4757b351SPierre Pronchery mov v0.s[0],w6 2779*4757b351SPierre Pronchery 2780*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2781*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2782*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2783*4757b351SPierre Pronchery 2784*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2785*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2786*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2787*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2788*4757b351SPierre Pronchery 2789*4757b351SPierre Pronchery mov w6,v0.s[0] 2790*4757b351SPierre Pronchery mov w7,v1.s[0] 2791*4757b351SPierre Pronchery mov w9,v2.s[0] 2792*4757b351SPierre Pronchery add w7,w6,w7 2793*4757b351SPierre Pronchery mov w6,v3.s[0] 2794*4757b351SPierre Pronchery add w7,w7,w9 2795*4757b351SPierre Pronchery add w7,w7,w6 2796*4757b351SPierre Pronchery 2797*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2798*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2799*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2800*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2801*4757b351SPierre Pronchery eor w12,w12,w6 2802*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 2803*4757b351SPierre Pronchery eor w6,w14,w15 2804*4757b351SPierre Pronchery eor w9,w12,w8 2805*4757b351SPierre Pronchery eor w6,w6,w9 2806*4757b351SPierre Pronchery movi v1.16b,#64 2807*4757b351SPierre Pronchery movi v2.16b,#128 2808*4757b351SPierre Pronchery movi v3.16b,#192 2809*4757b351SPierre Pronchery mov v0.s[0],w6 2810*4757b351SPierre Pronchery 2811*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2812*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2813*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2814*4757b351SPierre Pronchery 2815*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2816*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2817*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2818*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2819*4757b351SPierre Pronchery 2820*4757b351SPierre Pronchery mov w6,v0.s[0] 2821*4757b351SPierre Pronchery mov w7,v1.s[0] 2822*4757b351SPierre Pronchery mov w9,v2.s[0] 2823*4757b351SPierre Pronchery add w7,w6,w7 2824*4757b351SPierre Pronchery mov w6,v3.s[0] 2825*4757b351SPierre Pronchery add w7,w7,w9 2826*4757b351SPierre Pronchery add w7,w7,w6 2827*4757b351SPierre Pronchery 2828*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2829*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2830*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2831*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2832*4757b351SPierre Pronchery ldp w7,w8,[x10],8 2833*4757b351SPierre Pronchery eor w13,w13,w6 2834*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 2835*4757b351SPierre Pronchery eor w6,w12,w13 2836*4757b351SPierre Pronchery eor w9,w7,w15 2837*4757b351SPierre Pronchery eor w6,w6,w9 2838*4757b351SPierre Pronchery movi v1.16b,#64 2839*4757b351SPierre Pronchery movi v2.16b,#128 2840*4757b351SPierre Pronchery movi v3.16b,#192 2841*4757b351SPierre Pronchery mov v0.s[0],w6 2842*4757b351SPierre Pronchery 2843*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2844*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2845*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2846*4757b351SPierre Pronchery 2847*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2848*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2849*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2850*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2851*4757b351SPierre Pronchery 2852*4757b351SPierre Pronchery mov w6,v0.s[0] 2853*4757b351SPierre Pronchery mov w7,v1.s[0] 2854*4757b351SPierre Pronchery mov w9,v2.s[0] 2855*4757b351SPierre Pronchery add w7,w6,w7 2856*4757b351SPierre Pronchery mov w6,v3.s[0] 2857*4757b351SPierre Pronchery add w7,w7,w9 2858*4757b351SPierre Pronchery add w7,w7,w6 2859*4757b351SPierre Pronchery 2860*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2861*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2862*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2863*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2864*4757b351SPierre Pronchery eor w14,w14,w6 2865*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 2866*4757b351SPierre Pronchery eor w6,w12,w13 2867*4757b351SPierre Pronchery eor w9,w14,w8 2868*4757b351SPierre Pronchery eor w6,w6,w9 2869*4757b351SPierre Pronchery movi v1.16b,#64 2870*4757b351SPierre Pronchery movi v2.16b,#128 2871*4757b351SPierre Pronchery movi v3.16b,#192 2872*4757b351SPierre Pronchery mov v0.s[0],w6 2873*4757b351SPierre Pronchery 2874*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 2875*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 2876*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 2877*4757b351SPierre Pronchery 2878*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 2879*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 2880*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 2881*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 2882*4757b351SPierre Pronchery 2883*4757b351SPierre Pronchery mov w6,v0.s[0] 2884*4757b351SPierre Pronchery mov w7,v1.s[0] 2885*4757b351SPierre Pronchery mov w9,v2.s[0] 2886*4757b351SPierre Pronchery add w7,w6,w7 2887*4757b351SPierre Pronchery mov w6,v3.s[0] 2888*4757b351SPierre Pronchery add w7,w7,w9 2889*4757b351SPierre Pronchery add w7,w7,w6 2890*4757b351SPierre Pronchery 2891*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 2892*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 2893*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 2894*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 2895*4757b351SPierre Pronchery eor w15,w15,w6 2896*4757b351SPierre Pronchery subs w11,w11,#1 2897*4757b351SPierre Pronchery b.ne 10b 2898*4757b351SPierre Pronchery mov v8.s[0],w15 2899*4757b351SPierre Pronchery mov v8.s[1],w14 2900*4757b351SPierre Pronchery mov v8.s[2],w13 2901*4757b351SPierre Pronchery mov v8.s[3],w12 2902*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 2903*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 2904*4757b351SPierre Pronchery#endif 2905*4757b351SPierre Pronchery mov x3,x26 2906*4757b351SPierre Pronchery and x29,x2,#0x0F 2907*4757b351SPierre Pronchery // convert length into blocks 2908*4757b351SPierre Pronchery lsr x2,x2,4 2909*4757b351SPierre Pronchery cmp x2,#1 2910*4757b351SPierre Pronchery b.lt .return_gb 2911*4757b351SPierre Pronchery 2912*4757b351SPierre Pronchery cmp x29,0 2913*4757b351SPierre Pronchery // If the encryption/decryption Length is N times of 16, 2914*4757b351SPierre Pronchery // the all blocks are encrypted/decrypted in .xts_encrypt_blocks_gb 2915*4757b351SPierre Pronchery b.eq .xts_encrypt_blocks_gb 2916*4757b351SPierre Pronchery 2917*4757b351SPierre Pronchery // If the encryption/decryption length is not N times of 16, 2918*4757b351SPierre Pronchery // the last two blocks are encrypted/decrypted in .last_2blks_tweak_gb or .only_2blks_tweak_gb 2919*4757b351SPierre Pronchery // the other blocks are encrypted/decrypted in .xts_encrypt_blocks_gb 2920*4757b351SPierre Pronchery subs x2,x2,#1 2921*4757b351SPierre Pronchery b.eq .only_2blks_tweak_gb 2922*4757b351SPierre Pronchery.xts_encrypt_blocks_gb: 2923*4757b351SPierre Pronchery rbit v8.16b,v8.16b 2924*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2925*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 2926*4757b351SPierre Pronchery#endif 2927*4757b351SPierre Pronchery mov x12,v8.d[0] 2928*4757b351SPierre Pronchery mov x13,v8.d[1] 2929*4757b351SPierre Pronchery mov w7,0x87 2930*4757b351SPierre Pronchery extr x9,x13,x13,#32 2931*4757b351SPierre Pronchery extr x15,x13,x12,#63 2932*4757b351SPierre Pronchery and w8,w7,w9,asr#31 2933*4757b351SPierre Pronchery eor x14,x8,x12,lsl#1 2934*4757b351SPierre Pronchery mov w7,0x87 2935*4757b351SPierre Pronchery extr x9,x15,x15,#32 2936*4757b351SPierre Pronchery extr x17,x15,x14,#63 2937*4757b351SPierre Pronchery and w8,w7,w9,asr#31 2938*4757b351SPierre Pronchery eor x16,x8,x14,lsl#1 2939*4757b351SPierre Pronchery mov w7,0x87 2940*4757b351SPierre Pronchery extr x9,x17,x17,#32 2941*4757b351SPierre Pronchery extr x19,x17,x16,#63 2942*4757b351SPierre Pronchery and w8,w7,w9,asr#31 2943*4757b351SPierre Pronchery eor x18,x8,x16,lsl#1 2944*4757b351SPierre Pronchery mov w7,0x87 2945*4757b351SPierre Pronchery extr x9,x19,x19,#32 2946*4757b351SPierre Pronchery extr x21,x19,x18,#63 2947*4757b351SPierre Pronchery and w8,w7,w9,asr#31 2948*4757b351SPierre Pronchery eor x20,x8,x18,lsl#1 2949*4757b351SPierre Pronchery mov w7,0x87 2950*4757b351SPierre Pronchery extr x9,x21,x21,#32 2951*4757b351SPierre Pronchery extr x23,x21,x20,#63 2952*4757b351SPierre Pronchery and w8,w7,w9,asr#31 2953*4757b351SPierre Pronchery eor x22,x8,x20,lsl#1 2954*4757b351SPierre Pronchery mov w7,0x87 2955*4757b351SPierre Pronchery extr x9,x23,x23,#32 2956*4757b351SPierre Pronchery extr x25,x23,x22,#63 2957*4757b351SPierre Pronchery and w8,w7,w9,asr#31 2958*4757b351SPierre Pronchery eor x24,x8,x22,lsl#1 2959*4757b351SPierre Pronchery mov w7,0x87 2960*4757b351SPierre Pronchery extr x9,x25,x25,#32 2961*4757b351SPierre Pronchery extr x27,x25,x24,#63 2962*4757b351SPierre Pronchery and w8,w7,w9,asr#31 2963*4757b351SPierre Pronchery eor x26,x8,x24,lsl#1 2964*4757b351SPierre Pronchery.Lxts_8_blocks_process_gb: 2965*4757b351SPierre Pronchery cmp x2,#8 2966*4757b351SPierre Pronchery b.lt .Lxts_4_blocks_process_gb 2967*4757b351SPierre Pronchery mov v0.d[0],x12 2968*4757b351SPierre Pronchery mov v0.d[1],x13 2969*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2970*4757b351SPierre Pronchery rev32 v0.16b,v0.16b 2971*4757b351SPierre Pronchery#endif 2972*4757b351SPierre Pronchery mov v1.d[0],x14 2973*4757b351SPierre Pronchery mov v1.d[1],x15 2974*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2975*4757b351SPierre Pronchery rev32 v1.16b,v1.16b 2976*4757b351SPierre Pronchery#endif 2977*4757b351SPierre Pronchery mov v2.d[0],x16 2978*4757b351SPierre Pronchery mov v2.d[1],x17 2979*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2980*4757b351SPierre Pronchery rev32 v2.16b,v2.16b 2981*4757b351SPierre Pronchery#endif 2982*4757b351SPierre Pronchery mov v3.d[0],x18 2983*4757b351SPierre Pronchery mov v3.d[1],x19 2984*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2985*4757b351SPierre Pronchery rev32 v3.16b,v3.16b 2986*4757b351SPierre Pronchery#endif 2987*4757b351SPierre Pronchery mov v12.d[0],x20 2988*4757b351SPierre Pronchery mov v12.d[1],x21 2989*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2990*4757b351SPierre Pronchery rev32 v12.16b,v12.16b 2991*4757b351SPierre Pronchery#endif 2992*4757b351SPierre Pronchery mov v13.d[0],x22 2993*4757b351SPierre Pronchery mov v13.d[1],x23 2994*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2995*4757b351SPierre Pronchery rev32 v13.16b,v13.16b 2996*4757b351SPierre Pronchery#endif 2997*4757b351SPierre Pronchery mov v14.d[0],x24 2998*4757b351SPierre Pronchery mov v14.d[1],x25 2999*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3000*4757b351SPierre Pronchery rev32 v14.16b,v14.16b 3001*4757b351SPierre Pronchery#endif 3002*4757b351SPierre Pronchery mov v15.d[0],x26 3003*4757b351SPierre Pronchery mov v15.d[1],x27 3004*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3005*4757b351SPierre Pronchery rev32 v15.16b,v15.16b 3006*4757b351SPierre Pronchery#endif 3007*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 3008*4757b351SPierre Pronchery rbit v0.16b,v0.16b 3009*4757b351SPierre Pronchery rbit v1.16b,v1.16b 3010*4757b351SPierre Pronchery rbit v2.16b,v2.16b 3011*4757b351SPierre Pronchery rbit v3.16b,v3.16b 3012*4757b351SPierre Pronchery eor v4.16b, v4.16b, v0.16b 3013*4757b351SPierre Pronchery eor v5.16b, v5.16b, v1.16b 3014*4757b351SPierre Pronchery eor v6.16b, v6.16b, v2.16b 3015*4757b351SPierre Pronchery eor v7.16b, v7.16b, v3.16b 3016*4757b351SPierre Pronchery ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 3017*4757b351SPierre Pronchery rbit v12.16b,v12.16b 3018*4757b351SPierre Pronchery rbit v13.16b,v13.16b 3019*4757b351SPierre Pronchery rbit v14.16b,v14.16b 3020*4757b351SPierre Pronchery rbit v15.16b,v15.16b 3021*4757b351SPierre Pronchery eor v8.16b, v8.16b, v12.16b 3022*4757b351SPierre Pronchery eor v9.16b, v9.16b, v13.16b 3023*4757b351SPierre Pronchery eor v10.16b, v10.16b, v14.16b 3024*4757b351SPierre Pronchery eor v11.16b, v11.16b, v15.16b 3025*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3026*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3027*4757b351SPierre Pronchery#endif 3028*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3029*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 3030*4757b351SPierre Pronchery#endif 3031*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3032*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 3033*4757b351SPierre Pronchery#endif 3034*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3035*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 3036*4757b351SPierre Pronchery#endif 3037*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3038*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 3039*4757b351SPierre Pronchery#endif 3040*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3041*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 3042*4757b351SPierre Pronchery#endif 3043*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3044*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 3045*4757b351SPierre Pronchery#endif 3046*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3047*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 3048*4757b351SPierre Pronchery#endif 3049*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 3050*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 3051*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 3052*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 3053*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 3054*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 3055*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 3056*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 3057*4757b351SPierre Pronchery zip1 v0.4s,v8.4s,v9.4s 3058*4757b351SPierre Pronchery zip2 v1.4s,v8.4s,v9.4s 3059*4757b351SPierre Pronchery zip1 v2.4s,v10.4s,v11.4s 3060*4757b351SPierre Pronchery zip2 v3.4s,v10.4s,v11.4s 3061*4757b351SPierre Pronchery zip1 v8.2d,v0.2d,v2.2d 3062*4757b351SPierre Pronchery zip2 v9.2d,v0.2d,v2.2d 3063*4757b351SPierre Pronchery zip1 v10.2d,v1.2d,v3.2d 3064*4757b351SPierre Pronchery zip2 v11.2d,v1.2d,v3.2d 3065*4757b351SPierre Pronchery bl _vpsm4_enc_8blks 3066*4757b351SPierre Pronchery zip1 v8.4s,v0.4s,v1.4s 3067*4757b351SPierre Pronchery zip2 v9.4s,v0.4s,v1.4s 3068*4757b351SPierre Pronchery zip1 v10.4s,v2.4s,v3.4s 3069*4757b351SPierre Pronchery zip2 v11.4s,v2.4s,v3.4s 3070*4757b351SPierre Pronchery zip1 v0.2d,v8.2d,v10.2d 3071*4757b351SPierre Pronchery zip2 v1.2d,v8.2d,v10.2d 3072*4757b351SPierre Pronchery zip1 v2.2d,v9.2d,v11.2d 3073*4757b351SPierre Pronchery zip2 v3.2d,v9.2d,v11.2d 3074*4757b351SPierre Pronchery zip1 v8.4s,v4.4s,v5.4s 3075*4757b351SPierre Pronchery zip2 v9.4s,v4.4s,v5.4s 3076*4757b351SPierre Pronchery zip1 v10.4s,v6.4s,v7.4s 3077*4757b351SPierre Pronchery zip2 v11.4s,v6.4s,v7.4s 3078*4757b351SPierre Pronchery zip1 v4.2d,v8.2d,v10.2d 3079*4757b351SPierre Pronchery zip2 v5.2d,v8.2d,v10.2d 3080*4757b351SPierre Pronchery zip1 v6.2d,v9.2d,v11.2d 3081*4757b351SPierre Pronchery zip2 v7.2d,v9.2d,v11.2d 3082*4757b351SPierre Pronchery mov v12.d[0],x12 3083*4757b351SPierre Pronchery mov v12.d[1],x13 3084*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3085*4757b351SPierre Pronchery rev32 v12.16b,v12.16b 3086*4757b351SPierre Pronchery#endif 3087*4757b351SPierre Pronchery mov w7,0x87 3088*4757b351SPierre Pronchery extr x9,x27,x27,#32 3089*4757b351SPierre Pronchery extr x13,x27,x26,#63 3090*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3091*4757b351SPierre Pronchery eor x12,x8,x26,lsl#1 3092*4757b351SPierre Pronchery mov v13.d[0],x14 3093*4757b351SPierre Pronchery mov v13.d[1],x15 3094*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3095*4757b351SPierre Pronchery rev32 v13.16b,v13.16b 3096*4757b351SPierre Pronchery#endif 3097*4757b351SPierre Pronchery mov w7,0x87 3098*4757b351SPierre Pronchery extr x9,x13,x13,#32 3099*4757b351SPierre Pronchery extr x15,x13,x12,#63 3100*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3101*4757b351SPierre Pronchery eor x14,x8,x12,lsl#1 3102*4757b351SPierre Pronchery mov v14.d[0],x16 3103*4757b351SPierre Pronchery mov v14.d[1],x17 3104*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3105*4757b351SPierre Pronchery rev32 v14.16b,v14.16b 3106*4757b351SPierre Pronchery#endif 3107*4757b351SPierre Pronchery mov w7,0x87 3108*4757b351SPierre Pronchery extr x9,x15,x15,#32 3109*4757b351SPierre Pronchery extr x17,x15,x14,#63 3110*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3111*4757b351SPierre Pronchery eor x16,x8,x14,lsl#1 3112*4757b351SPierre Pronchery mov v15.d[0],x18 3113*4757b351SPierre Pronchery mov v15.d[1],x19 3114*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3115*4757b351SPierre Pronchery rev32 v15.16b,v15.16b 3116*4757b351SPierre Pronchery#endif 3117*4757b351SPierre Pronchery mov w7,0x87 3118*4757b351SPierre Pronchery extr x9,x17,x17,#32 3119*4757b351SPierre Pronchery extr x19,x17,x16,#63 3120*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3121*4757b351SPierre Pronchery eor x18,x8,x16,lsl#1 3122*4757b351SPierre Pronchery mov v8.d[0],x20 3123*4757b351SPierre Pronchery mov v8.d[1],x21 3124*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3125*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 3126*4757b351SPierre Pronchery#endif 3127*4757b351SPierre Pronchery mov w7,0x87 3128*4757b351SPierre Pronchery extr x9,x19,x19,#32 3129*4757b351SPierre Pronchery extr x21,x19,x18,#63 3130*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3131*4757b351SPierre Pronchery eor x20,x8,x18,lsl#1 3132*4757b351SPierre Pronchery mov v9.d[0],x22 3133*4757b351SPierre Pronchery mov v9.d[1],x23 3134*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3135*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 3136*4757b351SPierre Pronchery#endif 3137*4757b351SPierre Pronchery mov w7,0x87 3138*4757b351SPierre Pronchery extr x9,x21,x21,#32 3139*4757b351SPierre Pronchery extr x23,x21,x20,#63 3140*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3141*4757b351SPierre Pronchery eor x22,x8,x20,lsl#1 3142*4757b351SPierre Pronchery mov v10.d[0],x24 3143*4757b351SPierre Pronchery mov v10.d[1],x25 3144*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3145*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 3146*4757b351SPierre Pronchery#endif 3147*4757b351SPierre Pronchery mov w7,0x87 3148*4757b351SPierre Pronchery extr x9,x23,x23,#32 3149*4757b351SPierre Pronchery extr x25,x23,x22,#63 3150*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3151*4757b351SPierre Pronchery eor x24,x8,x22,lsl#1 3152*4757b351SPierre Pronchery mov v11.d[0],x26 3153*4757b351SPierre Pronchery mov v11.d[1],x27 3154*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3155*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 3156*4757b351SPierre Pronchery#endif 3157*4757b351SPierre Pronchery mov w7,0x87 3158*4757b351SPierre Pronchery extr x9,x25,x25,#32 3159*4757b351SPierre Pronchery extr x27,x25,x24,#63 3160*4757b351SPierre Pronchery and w8,w7,w9,asr#31 3161*4757b351SPierre Pronchery eor x26,x8,x24,lsl#1 3162*4757b351SPierre Pronchery eor v0.16b, v0.16b, v12.16b 3163*4757b351SPierre Pronchery eor v1.16b, v1.16b, v13.16b 3164*4757b351SPierre Pronchery eor v2.16b, v2.16b, v14.16b 3165*4757b351SPierre Pronchery eor v3.16b, v3.16b, v15.16b 3166*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 3167*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 3168*4757b351SPierre Pronchery eor v6.16b, v6.16b, v10.16b 3169*4757b351SPierre Pronchery eor v7.16b, v7.16b, v11.16b 3170*4757b351SPierre Pronchery 3171*4757b351SPierre Pronchery // save the last tweak 3172*4757b351SPierre Pronchery st1 {v11.4s},[x5] 3173*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 3174*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 3175*4757b351SPierre Pronchery subs x2,x2,#8 3176*4757b351SPierre Pronchery b.gt .Lxts_8_blocks_process_gb 3177*4757b351SPierre Pronchery b 100f 3178*4757b351SPierre Pronchery.Lxts_4_blocks_process_gb: 3179*4757b351SPierre Pronchery mov v8.d[0],x12 3180*4757b351SPierre Pronchery mov v8.d[1],x13 3181*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3182*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 3183*4757b351SPierre Pronchery#endif 3184*4757b351SPierre Pronchery mov v9.d[0],x14 3185*4757b351SPierre Pronchery mov v9.d[1],x15 3186*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3187*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 3188*4757b351SPierre Pronchery#endif 3189*4757b351SPierre Pronchery mov v10.d[0],x16 3190*4757b351SPierre Pronchery mov v10.d[1],x17 3191*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3192*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 3193*4757b351SPierre Pronchery#endif 3194*4757b351SPierre Pronchery mov v11.d[0],x18 3195*4757b351SPierre Pronchery mov v11.d[1],x19 3196*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3197*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 3198*4757b351SPierre Pronchery#endif 3199*4757b351SPierre Pronchery cmp x2,#4 3200*4757b351SPierre Pronchery b.lt 1f 3201*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 3202*4757b351SPierre Pronchery rbit v8.16b,v8.16b 3203*4757b351SPierre Pronchery rbit v9.16b,v9.16b 3204*4757b351SPierre Pronchery rbit v10.16b,v10.16b 3205*4757b351SPierre Pronchery rbit v11.16b,v11.16b 3206*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 3207*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 3208*4757b351SPierre Pronchery eor v6.16b, v6.16b, v10.16b 3209*4757b351SPierre Pronchery eor v7.16b, v7.16b, v11.16b 3210*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3211*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3212*4757b351SPierre Pronchery#endif 3213*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3214*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 3215*4757b351SPierre Pronchery#endif 3216*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3217*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 3218*4757b351SPierre Pronchery#endif 3219*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3220*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 3221*4757b351SPierre Pronchery#endif 3222*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 3223*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 3224*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 3225*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 3226*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 3227*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 3228*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 3229*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 3230*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 3231*4757b351SPierre Pronchery zip1 v4.4s,v0.4s,v1.4s 3232*4757b351SPierre Pronchery zip2 v5.4s,v0.4s,v1.4s 3233*4757b351SPierre Pronchery zip1 v6.4s,v2.4s,v3.4s 3234*4757b351SPierre Pronchery zip2 v7.4s,v2.4s,v3.4s 3235*4757b351SPierre Pronchery zip1 v0.2d,v4.2d,v6.2d 3236*4757b351SPierre Pronchery zip2 v1.2d,v4.2d,v6.2d 3237*4757b351SPierre Pronchery zip1 v2.2d,v5.2d,v7.2d 3238*4757b351SPierre Pronchery zip2 v3.2d,v5.2d,v7.2d 3239*4757b351SPierre Pronchery eor v0.16b, v0.16b, v8.16b 3240*4757b351SPierre Pronchery eor v1.16b, v1.16b, v9.16b 3241*4757b351SPierre Pronchery eor v2.16b, v2.16b, v10.16b 3242*4757b351SPierre Pronchery eor v3.16b, v3.16b, v11.16b 3243*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 3244*4757b351SPierre Pronchery sub x2,x2,#4 3245*4757b351SPierre Pronchery mov v8.d[0],x20 3246*4757b351SPierre Pronchery mov v8.d[1],x21 3247*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3248*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 3249*4757b351SPierre Pronchery#endif 3250*4757b351SPierre Pronchery mov v9.d[0],x22 3251*4757b351SPierre Pronchery mov v9.d[1],x23 3252*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3253*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 3254*4757b351SPierre Pronchery#endif 3255*4757b351SPierre Pronchery mov v10.d[0],x24 3256*4757b351SPierre Pronchery mov v10.d[1],x25 3257*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3258*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 3259*4757b351SPierre Pronchery#endif 3260*4757b351SPierre Pronchery // save the last tweak 3261*4757b351SPierre Pronchery st1 {v11.4s},[x5] 3262*4757b351SPierre Pronchery1: 3263*4757b351SPierre Pronchery // process last block 3264*4757b351SPierre Pronchery cmp x2,#1 3265*4757b351SPierre Pronchery b.lt 100f 3266*4757b351SPierre Pronchery b.gt 1f 3267*4757b351SPierre Pronchery ld1 {v4.4s},[x0],#16 3268*4757b351SPierre Pronchery rbit v8.16b,v8.16b 3269*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 3270*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3271*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3272*4757b351SPierre Pronchery#endif 3273*4757b351SPierre Pronchery mov x10,x3 3274*4757b351SPierre Pronchery mov w11,#8 3275*4757b351SPierre Pronchery mov w12,v4.s[0] 3276*4757b351SPierre Pronchery mov w13,v4.s[1] 3277*4757b351SPierre Pronchery mov w14,v4.s[2] 3278*4757b351SPierre Pronchery mov w15,v4.s[3] 3279*4757b351SPierre Pronchery10: 3280*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3281*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 3282*4757b351SPierre Pronchery eor w6,w14,w15 3283*4757b351SPierre Pronchery eor w9,w7,w13 3284*4757b351SPierre Pronchery eor w6,w6,w9 3285*4757b351SPierre Pronchery movi v1.16b,#64 3286*4757b351SPierre Pronchery movi v2.16b,#128 3287*4757b351SPierre Pronchery movi v3.16b,#192 3288*4757b351SPierre Pronchery mov v0.s[0],w6 3289*4757b351SPierre Pronchery 3290*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3291*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3292*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3293*4757b351SPierre Pronchery 3294*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3295*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3296*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3297*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3298*4757b351SPierre Pronchery 3299*4757b351SPierre Pronchery mov w6,v0.s[0] 3300*4757b351SPierre Pronchery mov w7,v1.s[0] 3301*4757b351SPierre Pronchery mov w9,v2.s[0] 3302*4757b351SPierre Pronchery add w7,w6,w7 3303*4757b351SPierre Pronchery mov w6,v3.s[0] 3304*4757b351SPierre Pronchery add w7,w7,w9 3305*4757b351SPierre Pronchery add w7,w7,w6 3306*4757b351SPierre Pronchery 3307*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3308*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3309*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3310*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3311*4757b351SPierre Pronchery eor w12,w12,w6 3312*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 3313*4757b351SPierre Pronchery eor w6,w14,w15 3314*4757b351SPierre Pronchery eor w9,w12,w8 3315*4757b351SPierre Pronchery eor w6,w6,w9 3316*4757b351SPierre Pronchery movi v1.16b,#64 3317*4757b351SPierre Pronchery movi v2.16b,#128 3318*4757b351SPierre Pronchery movi v3.16b,#192 3319*4757b351SPierre Pronchery mov v0.s[0],w6 3320*4757b351SPierre Pronchery 3321*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3322*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3323*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3324*4757b351SPierre Pronchery 3325*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3326*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3327*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3328*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3329*4757b351SPierre Pronchery 3330*4757b351SPierre Pronchery mov w6,v0.s[0] 3331*4757b351SPierre Pronchery mov w7,v1.s[0] 3332*4757b351SPierre Pronchery mov w9,v2.s[0] 3333*4757b351SPierre Pronchery add w7,w6,w7 3334*4757b351SPierre Pronchery mov w6,v3.s[0] 3335*4757b351SPierre Pronchery add w7,w7,w9 3336*4757b351SPierre Pronchery add w7,w7,w6 3337*4757b351SPierre Pronchery 3338*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3339*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3340*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3341*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3342*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3343*4757b351SPierre Pronchery eor w13,w13,w6 3344*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 3345*4757b351SPierre Pronchery eor w6,w12,w13 3346*4757b351SPierre Pronchery eor w9,w7,w15 3347*4757b351SPierre Pronchery eor w6,w6,w9 3348*4757b351SPierre Pronchery movi v1.16b,#64 3349*4757b351SPierre Pronchery movi v2.16b,#128 3350*4757b351SPierre Pronchery movi v3.16b,#192 3351*4757b351SPierre Pronchery mov v0.s[0],w6 3352*4757b351SPierre Pronchery 3353*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3354*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3355*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3356*4757b351SPierre Pronchery 3357*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3358*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3359*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3360*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3361*4757b351SPierre Pronchery 3362*4757b351SPierre Pronchery mov w6,v0.s[0] 3363*4757b351SPierre Pronchery mov w7,v1.s[0] 3364*4757b351SPierre Pronchery mov w9,v2.s[0] 3365*4757b351SPierre Pronchery add w7,w6,w7 3366*4757b351SPierre Pronchery mov w6,v3.s[0] 3367*4757b351SPierre Pronchery add w7,w7,w9 3368*4757b351SPierre Pronchery add w7,w7,w6 3369*4757b351SPierre Pronchery 3370*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3371*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3372*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3373*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3374*4757b351SPierre Pronchery eor w14,w14,w6 3375*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 3376*4757b351SPierre Pronchery eor w6,w12,w13 3377*4757b351SPierre Pronchery eor w9,w14,w8 3378*4757b351SPierre Pronchery eor w6,w6,w9 3379*4757b351SPierre Pronchery movi v1.16b,#64 3380*4757b351SPierre Pronchery movi v2.16b,#128 3381*4757b351SPierre Pronchery movi v3.16b,#192 3382*4757b351SPierre Pronchery mov v0.s[0],w6 3383*4757b351SPierre Pronchery 3384*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3385*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3386*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3387*4757b351SPierre Pronchery 3388*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3389*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3390*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3391*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3392*4757b351SPierre Pronchery 3393*4757b351SPierre Pronchery mov w6,v0.s[0] 3394*4757b351SPierre Pronchery mov w7,v1.s[0] 3395*4757b351SPierre Pronchery mov w9,v2.s[0] 3396*4757b351SPierre Pronchery add w7,w6,w7 3397*4757b351SPierre Pronchery mov w6,v3.s[0] 3398*4757b351SPierre Pronchery add w7,w7,w9 3399*4757b351SPierre Pronchery add w7,w7,w6 3400*4757b351SPierre Pronchery 3401*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3402*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3403*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3404*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3405*4757b351SPierre Pronchery eor w15,w15,w6 3406*4757b351SPierre Pronchery subs w11,w11,#1 3407*4757b351SPierre Pronchery b.ne 10b 3408*4757b351SPierre Pronchery mov v4.s[0],w15 3409*4757b351SPierre Pronchery mov v4.s[1],w14 3410*4757b351SPierre Pronchery mov v4.s[2],w13 3411*4757b351SPierre Pronchery mov v4.s[3],w12 3412*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3413*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3414*4757b351SPierre Pronchery#endif 3415*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 3416*4757b351SPierre Pronchery st1 {v4.4s},[x1],#16 3417*4757b351SPierre Pronchery // save the last tweak 3418*4757b351SPierre Pronchery st1 {v8.4s},[x5] 3419*4757b351SPierre Pronchery b 100f 3420*4757b351SPierre Pronchery1: // process last 2 blocks 3421*4757b351SPierre Pronchery cmp x2,#2 3422*4757b351SPierre Pronchery b.gt 1f 3423*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s},[x0],#32 3424*4757b351SPierre Pronchery rbit v8.16b,v8.16b 3425*4757b351SPierre Pronchery rbit v9.16b,v9.16b 3426*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 3427*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 3428*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3429*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3430*4757b351SPierre Pronchery#endif 3431*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3432*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 3433*4757b351SPierre Pronchery#endif 3434*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 3435*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 3436*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 3437*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 3438*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 3439*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 3440*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 3441*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 3442*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 3443*4757b351SPierre Pronchery zip1 v4.4s,v0.4s,v1.4s 3444*4757b351SPierre Pronchery zip2 v5.4s,v0.4s,v1.4s 3445*4757b351SPierre Pronchery zip1 v6.4s,v2.4s,v3.4s 3446*4757b351SPierre Pronchery zip2 v7.4s,v2.4s,v3.4s 3447*4757b351SPierre Pronchery zip1 v0.2d,v4.2d,v6.2d 3448*4757b351SPierre Pronchery zip2 v1.2d,v4.2d,v6.2d 3449*4757b351SPierre Pronchery zip1 v2.2d,v5.2d,v7.2d 3450*4757b351SPierre Pronchery zip2 v3.2d,v5.2d,v7.2d 3451*4757b351SPierre Pronchery eor v0.16b, v0.16b, v8.16b 3452*4757b351SPierre Pronchery eor v1.16b, v1.16b, v9.16b 3453*4757b351SPierre Pronchery st1 {v0.4s,v1.4s},[x1],#32 3454*4757b351SPierre Pronchery // save the last tweak 3455*4757b351SPierre Pronchery st1 {v9.4s},[x5] 3456*4757b351SPierre Pronchery b 100f 3457*4757b351SPierre Pronchery1: // process last 3 blocks 3458*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 3459*4757b351SPierre Pronchery rbit v8.16b,v8.16b 3460*4757b351SPierre Pronchery rbit v9.16b,v9.16b 3461*4757b351SPierre Pronchery rbit v10.16b,v10.16b 3462*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 3463*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 3464*4757b351SPierre Pronchery eor v6.16b, v6.16b, v10.16b 3465*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3466*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3467*4757b351SPierre Pronchery#endif 3468*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3469*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 3470*4757b351SPierre Pronchery#endif 3471*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3472*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 3473*4757b351SPierre Pronchery#endif 3474*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 3475*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 3476*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 3477*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 3478*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 3479*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 3480*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 3481*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 3482*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 3483*4757b351SPierre Pronchery zip1 v4.4s,v0.4s,v1.4s 3484*4757b351SPierre Pronchery zip2 v5.4s,v0.4s,v1.4s 3485*4757b351SPierre Pronchery zip1 v6.4s,v2.4s,v3.4s 3486*4757b351SPierre Pronchery zip2 v7.4s,v2.4s,v3.4s 3487*4757b351SPierre Pronchery zip1 v0.2d,v4.2d,v6.2d 3488*4757b351SPierre Pronchery zip2 v1.2d,v4.2d,v6.2d 3489*4757b351SPierre Pronchery zip1 v2.2d,v5.2d,v7.2d 3490*4757b351SPierre Pronchery zip2 v3.2d,v5.2d,v7.2d 3491*4757b351SPierre Pronchery eor v0.16b, v0.16b, v8.16b 3492*4757b351SPierre Pronchery eor v1.16b, v1.16b, v9.16b 3493*4757b351SPierre Pronchery eor v2.16b, v2.16b, v10.16b 3494*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s},[x1],#48 3495*4757b351SPierre Pronchery // save the last tweak 3496*4757b351SPierre Pronchery st1 {v10.4s},[x5] 3497*4757b351SPierre Pronchery100: 3498*4757b351SPierre Pronchery cmp x29,0 3499*4757b351SPierre Pronchery b.eq .return_gb 3500*4757b351SPierre Pronchery 3501*4757b351SPierre Pronchery// This branch calculates the last two tweaks, 3502*4757b351SPierre Pronchery// while the encryption/decryption length is larger than 32 3503*4757b351SPierre Pronchery.last_2blks_tweak_gb: 3504*4757b351SPierre Pronchery ld1 {v8.4s},[x5] 3505*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3506*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 3507*4757b351SPierre Pronchery#endif 3508*4757b351SPierre Pronchery rbit v2.16b,v8.16b 3509*4757b351SPierre Pronchery adrp x10,.Lxts_magic 3510*4757b351SPierre Pronchery ldr q0, [x10, #:lo12:.Lxts_magic] 3511*4757b351SPierre Pronchery shl v9.16b, v2.16b, #1 3512*4757b351SPierre Pronchery ext v1.16b, v2.16b, v2.16b,#15 3513*4757b351SPierre Pronchery ushr v1.16b, v1.16b, #7 3514*4757b351SPierre Pronchery mul v1.16b, v1.16b, v0.16b 3515*4757b351SPierre Pronchery eor v9.16b, v9.16b, v1.16b 3516*4757b351SPierre Pronchery rbit v9.16b,v9.16b 3517*4757b351SPierre Pronchery rbit v2.16b,v9.16b 3518*4757b351SPierre Pronchery adrp x10,.Lxts_magic 3519*4757b351SPierre Pronchery ldr q0, [x10, #:lo12:.Lxts_magic] 3520*4757b351SPierre Pronchery shl v10.16b, v2.16b, #1 3521*4757b351SPierre Pronchery ext v1.16b, v2.16b, v2.16b,#15 3522*4757b351SPierre Pronchery ushr v1.16b, v1.16b, #7 3523*4757b351SPierre Pronchery mul v1.16b, v1.16b, v0.16b 3524*4757b351SPierre Pronchery eor v10.16b, v10.16b, v1.16b 3525*4757b351SPierre Pronchery rbit v10.16b,v10.16b 3526*4757b351SPierre Pronchery b .check_dec_gb 3527*4757b351SPierre Pronchery 3528*4757b351SPierre Pronchery 3529*4757b351SPierre Pronchery// This branch calculates the last two tweaks, 3530*4757b351SPierre Pronchery// while the encryption/decryption length is equal to 32, who only need two tweaks 3531*4757b351SPierre Pronchery.only_2blks_tweak_gb: 3532*4757b351SPierre Pronchery mov v9.16b,v8.16b 3533*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3534*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 3535*4757b351SPierre Pronchery#endif 3536*4757b351SPierre Pronchery rbit v2.16b,v9.16b 3537*4757b351SPierre Pronchery adrp x10,.Lxts_magic 3538*4757b351SPierre Pronchery ldr q0, [x10, #:lo12:.Lxts_magic] 3539*4757b351SPierre Pronchery shl v10.16b, v2.16b, #1 3540*4757b351SPierre Pronchery ext v1.16b, v2.16b, v2.16b,#15 3541*4757b351SPierre Pronchery ushr v1.16b, v1.16b, #7 3542*4757b351SPierre Pronchery mul v1.16b, v1.16b, v0.16b 3543*4757b351SPierre Pronchery eor v10.16b, v10.16b, v1.16b 3544*4757b351SPierre Pronchery rbit v10.16b,v10.16b 3545*4757b351SPierre Pronchery b .check_dec_gb 3546*4757b351SPierre Pronchery 3547*4757b351SPierre Pronchery 3548*4757b351SPierre Pronchery// Determine whether encryption or decryption is required. 3549*4757b351SPierre Pronchery// The last two tweaks need to be swapped for decryption. 3550*4757b351SPierre Pronchery.check_dec_gb: 3551*4757b351SPierre Pronchery // encryption:1 decryption:0 3552*4757b351SPierre Pronchery cmp w28,1 3553*4757b351SPierre Pronchery b.eq .process_last_2blks_gb 3554*4757b351SPierre Pronchery mov v0.16B,v9.16b 3555*4757b351SPierre Pronchery mov v9.16B,v10.16b 3556*4757b351SPierre Pronchery mov v10.16B,v0.16b 3557*4757b351SPierre Pronchery 3558*4757b351SPierre Pronchery.process_last_2blks_gb: 3559*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3560*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 3561*4757b351SPierre Pronchery#endif 3562*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3563*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 3564*4757b351SPierre Pronchery#endif 3565*4757b351SPierre Pronchery ld1 {v4.4s},[x0],#16 3566*4757b351SPierre Pronchery eor v4.16b, v4.16b, v9.16b 3567*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3568*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3569*4757b351SPierre Pronchery#endif 3570*4757b351SPierre Pronchery mov x10,x3 3571*4757b351SPierre Pronchery mov w11,#8 3572*4757b351SPierre Pronchery mov w12,v4.s[0] 3573*4757b351SPierre Pronchery mov w13,v4.s[1] 3574*4757b351SPierre Pronchery mov w14,v4.s[2] 3575*4757b351SPierre Pronchery mov w15,v4.s[3] 3576*4757b351SPierre Pronchery10: 3577*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3578*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 3579*4757b351SPierre Pronchery eor w6,w14,w15 3580*4757b351SPierre Pronchery eor w9,w7,w13 3581*4757b351SPierre Pronchery eor w6,w6,w9 3582*4757b351SPierre Pronchery movi v1.16b,#64 3583*4757b351SPierre Pronchery movi v2.16b,#128 3584*4757b351SPierre Pronchery movi v3.16b,#192 3585*4757b351SPierre Pronchery mov v0.s[0],w6 3586*4757b351SPierre Pronchery 3587*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3588*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3589*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3590*4757b351SPierre Pronchery 3591*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3592*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3593*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3594*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3595*4757b351SPierre Pronchery 3596*4757b351SPierre Pronchery mov w6,v0.s[0] 3597*4757b351SPierre Pronchery mov w7,v1.s[0] 3598*4757b351SPierre Pronchery mov w9,v2.s[0] 3599*4757b351SPierre Pronchery add w7,w6,w7 3600*4757b351SPierre Pronchery mov w6,v3.s[0] 3601*4757b351SPierre Pronchery add w7,w7,w9 3602*4757b351SPierre Pronchery add w7,w7,w6 3603*4757b351SPierre Pronchery 3604*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3605*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3606*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3607*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3608*4757b351SPierre Pronchery eor w12,w12,w6 3609*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 3610*4757b351SPierre Pronchery eor w6,w14,w15 3611*4757b351SPierre Pronchery eor w9,w12,w8 3612*4757b351SPierre Pronchery eor w6,w6,w9 3613*4757b351SPierre Pronchery movi v1.16b,#64 3614*4757b351SPierre Pronchery movi v2.16b,#128 3615*4757b351SPierre Pronchery movi v3.16b,#192 3616*4757b351SPierre Pronchery mov v0.s[0],w6 3617*4757b351SPierre Pronchery 3618*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3619*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3620*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3621*4757b351SPierre Pronchery 3622*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3623*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3624*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3625*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3626*4757b351SPierre Pronchery 3627*4757b351SPierre Pronchery mov w6,v0.s[0] 3628*4757b351SPierre Pronchery mov w7,v1.s[0] 3629*4757b351SPierre Pronchery mov w9,v2.s[0] 3630*4757b351SPierre Pronchery add w7,w6,w7 3631*4757b351SPierre Pronchery mov w6,v3.s[0] 3632*4757b351SPierre Pronchery add w7,w7,w9 3633*4757b351SPierre Pronchery add w7,w7,w6 3634*4757b351SPierre Pronchery 3635*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3636*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3637*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3638*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3639*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3640*4757b351SPierre Pronchery eor w13,w13,w6 3641*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 3642*4757b351SPierre Pronchery eor w6,w12,w13 3643*4757b351SPierre Pronchery eor w9,w7,w15 3644*4757b351SPierre Pronchery eor w6,w6,w9 3645*4757b351SPierre Pronchery movi v1.16b,#64 3646*4757b351SPierre Pronchery movi v2.16b,#128 3647*4757b351SPierre Pronchery movi v3.16b,#192 3648*4757b351SPierre Pronchery mov v0.s[0],w6 3649*4757b351SPierre Pronchery 3650*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3651*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3652*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3653*4757b351SPierre Pronchery 3654*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3655*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3656*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3657*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3658*4757b351SPierre Pronchery 3659*4757b351SPierre Pronchery mov w6,v0.s[0] 3660*4757b351SPierre Pronchery mov w7,v1.s[0] 3661*4757b351SPierre Pronchery mov w9,v2.s[0] 3662*4757b351SPierre Pronchery add w7,w6,w7 3663*4757b351SPierre Pronchery mov w6,v3.s[0] 3664*4757b351SPierre Pronchery add w7,w7,w9 3665*4757b351SPierre Pronchery add w7,w7,w6 3666*4757b351SPierre Pronchery 3667*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3668*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3669*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3670*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3671*4757b351SPierre Pronchery eor w14,w14,w6 3672*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 3673*4757b351SPierre Pronchery eor w6,w12,w13 3674*4757b351SPierre Pronchery eor w9,w14,w8 3675*4757b351SPierre Pronchery eor w6,w6,w9 3676*4757b351SPierre Pronchery movi v1.16b,#64 3677*4757b351SPierre Pronchery movi v2.16b,#128 3678*4757b351SPierre Pronchery movi v3.16b,#192 3679*4757b351SPierre Pronchery mov v0.s[0],w6 3680*4757b351SPierre Pronchery 3681*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3682*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3683*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3684*4757b351SPierre Pronchery 3685*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3686*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3687*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3688*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3689*4757b351SPierre Pronchery 3690*4757b351SPierre Pronchery mov w6,v0.s[0] 3691*4757b351SPierre Pronchery mov w7,v1.s[0] 3692*4757b351SPierre Pronchery mov w9,v2.s[0] 3693*4757b351SPierre Pronchery add w7,w6,w7 3694*4757b351SPierre Pronchery mov w6,v3.s[0] 3695*4757b351SPierre Pronchery add w7,w7,w9 3696*4757b351SPierre Pronchery add w7,w7,w6 3697*4757b351SPierre Pronchery 3698*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3699*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3700*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3701*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3702*4757b351SPierre Pronchery eor w15,w15,w6 3703*4757b351SPierre Pronchery subs w11,w11,#1 3704*4757b351SPierre Pronchery b.ne 10b 3705*4757b351SPierre Pronchery mov v4.s[0],w15 3706*4757b351SPierre Pronchery mov v4.s[1],w14 3707*4757b351SPierre Pronchery mov v4.s[2],w13 3708*4757b351SPierre Pronchery mov v4.s[3],w12 3709*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3710*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3711*4757b351SPierre Pronchery#endif 3712*4757b351SPierre Pronchery eor v4.16b, v4.16b, v9.16b 3713*4757b351SPierre Pronchery st1 {v4.4s},[x1],#16 3714*4757b351SPierre Pronchery 3715*4757b351SPierre Pronchery sub x26,x1,16 3716*4757b351SPierre Pronchery.loop_gb: 3717*4757b351SPierre Pronchery subs x29,x29,1 3718*4757b351SPierre Pronchery ldrb w7,[x26,x29] 3719*4757b351SPierre Pronchery ldrb w8,[x0,x29] 3720*4757b351SPierre Pronchery strb w8,[x26,x29] 3721*4757b351SPierre Pronchery strb w7,[x1,x29] 3722*4757b351SPierre Pronchery b.gt .loop_gb 3723*4757b351SPierre Pronchery ld1 {v4.4s}, [x26] 3724*4757b351SPierre Pronchery eor v4.16b, v4.16b, v10.16b 3725*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3726*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3727*4757b351SPierre Pronchery#endif 3728*4757b351SPierre Pronchery mov x10,x3 3729*4757b351SPierre Pronchery mov w11,#8 3730*4757b351SPierre Pronchery mov w12,v4.s[0] 3731*4757b351SPierre Pronchery mov w13,v4.s[1] 3732*4757b351SPierre Pronchery mov w14,v4.s[2] 3733*4757b351SPierre Pronchery mov w15,v4.s[3] 3734*4757b351SPierre Pronchery10: 3735*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3736*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 3737*4757b351SPierre Pronchery eor w6,w14,w15 3738*4757b351SPierre Pronchery eor w9,w7,w13 3739*4757b351SPierre Pronchery eor w6,w6,w9 3740*4757b351SPierre Pronchery movi v1.16b,#64 3741*4757b351SPierre Pronchery movi v2.16b,#128 3742*4757b351SPierre Pronchery movi v3.16b,#192 3743*4757b351SPierre Pronchery mov v0.s[0],w6 3744*4757b351SPierre Pronchery 3745*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3746*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3747*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3748*4757b351SPierre Pronchery 3749*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3750*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3751*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3752*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3753*4757b351SPierre Pronchery 3754*4757b351SPierre Pronchery mov w6,v0.s[0] 3755*4757b351SPierre Pronchery mov w7,v1.s[0] 3756*4757b351SPierre Pronchery mov w9,v2.s[0] 3757*4757b351SPierre Pronchery add w7,w6,w7 3758*4757b351SPierre Pronchery mov w6,v3.s[0] 3759*4757b351SPierre Pronchery add w7,w7,w9 3760*4757b351SPierre Pronchery add w7,w7,w6 3761*4757b351SPierre Pronchery 3762*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3763*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3764*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3765*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3766*4757b351SPierre Pronchery eor w12,w12,w6 3767*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 3768*4757b351SPierre Pronchery eor w6,w14,w15 3769*4757b351SPierre Pronchery eor w9,w12,w8 3770*4757b351SPierre Pronchery eor w6,w6,w9 3771*4757b351SPierre Pronchery movi v1.16b,#64 3772*4757b351SPierre Pronchery movi v2.16b,#128 3773*4757b351SPierre Pronchery movi v3.16b,#192 3774*4757b351SPierre Pronchery mov v0.s[0],w6 3775*4757b351SPierre Pronchery 3776*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3777*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3778*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3779*4757b351SPierre Pronchery 3780*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3781*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3782*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3783*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3784*4757b351SPierre Pronchery 3785*4757b351SPierre Pronchery mov w6,v0.s[0] 3786*4757b351SPierre Pronchery mov w7,v1.s[0] 3787*4757b351SPierre Pronchery mov w9,v2.s[0] 3788*4757b351SPierre Pronchery add w7,w6,w7 3789*4757b351SPierre Pronchery mov w6,v3.s[0] 3790*4757b351SPierre Pronchery add w7,w7,w9 3791*4757b351SPierre Pronchery add w7,w7,w6 3792*4757b351SPierre Pronchery 3793*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3794*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3795*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3796*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3797*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3798*4757b351SPierre Pronchery eor w13,w13,w6 3799*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 3800*4757b351SPierre Pronchery eor w6,w12,w13 3801*4757b351SPierre Pronchery eor w9,w7,w15 3802*4757b351SPierre Pronchery eor w6,w6,w9 3803*4757b351SPierre Pronchery movi v1.16b,#64 3804*4757b351SPierre Pronchery movi v2.16b,#128 3805*4757b351SPierre Pronchery movi v3.16b,#192 3806*4757b351SPierre Pronchery mov v0.s[0],w6 3807*4757b351SPierre Pronchery 3808*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3809*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3810*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3811*4757b351SPierre Pronchery 3812*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3813*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3814*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3815*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3816*4757b351SPierre Pronchery 3817*4757b351SPierre Pronchery mov w6,v0.s[0] 3818*4757b351SPierre Pronchery mov w7,v1.s[0] 3819*4757b351SPierre Pronchery mov w9,v2.s[0] 3820*4757b351SPierre Pronchery add w7,w6,w7 3821*4757b351SPierre Pronchery mov w6,v3.s[0] 3822*4757b351SPierre Pronchery add w7,w7,w9 3823*4757b351SPierre Pronchery add w7,w7,w6 3824*4757b351SPierre Pronchery 3825*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3826*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3827*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3828*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3829*4757b351SPierre Pronchery eor w14,w14,w6 3830*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 3831*4757b351SPierre Pronchery eor w6,w12,w13 3832*4757b351SPierre Pronchery eor w9,w14,w8 3833*4757b351SPierre Pronchery eor w6,w6,w9 3834*4757b351SPierre Pronchery movi v1.16b,#64 3835*4757b351SPierre Pronchery movi v2.16b,#128 3836*4757b351SPierre Pronchery movi v3.16b,#192 3837*4757b351SPierre Pronchery mov v0.s[0],w6 3838*4757b351SPierre Pronchery 3839*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3840*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3841*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3842*4757b351SPierre Pronchery 3843*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3844*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3845*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3846*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3847*4757b351SPierre Pronchery 3848*4757b351SPierre Pronchery mov w6,v0.s[0] 3849*4757b351SPierre Pronchery mov w7,v1.s[0] 3850*4757b351SPierre Pronchery mov w9,v2.s[0] 3851*4757b351SPierre Pronchery add w7,w6,w7 3852*4757b351SPierre Pronchery mov w6,v3.s[0] 3853*4757b351SPierre Pronchery add w7,w7,w9 3854*4757b351SPierre Pronchery add w7,w7,w6 3855*4757b351SPierre Pronchery 3856*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3857*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3858*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3859*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3860*4757b351SPierre Pronchery eor w15,w15,w6 3861*4757b351SPierre Pronchery subs w11,w11,#1 3862*4757b351SPierre Pronchery b.ne 10b 3863*4757b351SPierre Pronchery mov v4.s[0],w15 3864*4757b351SPierre Pronchery mov v4.s[1],w14 3865*4757b351SPierre Pronchery mov v4.s[2],w13 3866*4757b351SPierre Pronchery mov v4.s[3],w12 3867*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3868*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 3869*4757b351SPierre Pronchery#endif 3870*4757b351SPierre Pronchery eor v4.16b, v4.16b, v10.16b 3871*4757b351SPierre Pronchery st1 {v4.4s}, [x26] 3872*4757b351SPierre Pronchery.return_gb: 3873*4757b351SPierre Pronchery ldp d14, d15, [sp], #0x10 3874*4757b351SPierre Pronchery ldp d12, d13, [sp], #0x10 3875*4757b351SPierre Pronchery ldp d10, d11, [sp], #0x10 3876*4757b351SPierre Pronchery ldp d8, d9, [sp], #0x10 3877*4757b351SPierre Pronchery ldp x29, x30, [sp], #0x10 3878*4757b351SPierre Pronchery ldp x27, x28, [sp], #0x10 3879*4757b351SPierre Pronchery ldp x25, x26, [sp], #0x10 3880*4757b351SPierre Pronchery ldp x23, x24, [sp], #0x10 3881*4757b351SPierre Pronchery ldp x21, x22, [sp], #0x10 3882*4757b351SPierre Pronchery ldp x19, x20, [sp], #0x10 3883*4757b351SPierre Pronchery ldp x17, x18, [sp], #0x10 3884*4757b351SPierre Pronchery ldp x15, x16, [sp], #0x10 3885*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 3886*4757b351SPierre Pronchery ret 3887*4757b351SPierre Pronchery.size vpsm4_xts_encrypt_gb,.-vpsm4_xts_encrypt_gb 3888*4757b351SPierre Pronchery.globl vpsm4_xts_encrypt 3889*4757b351SPierre Pronchery.type vpsm4_xts_encrypt,%function 3890*4757b351SPierre Pronchery.align 5 3891*4757b351SPierre Proncheryvpsm4_xts_encrypt: 3892*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 3893*4757b351SPierre Pronchery stp x15, x16, [sp, #-0x10]! 3894*4757b351SPierre Pronchery stp x17, x18, [sp, #-0x10]! 3895*4757b351SPierre Pronchery stp x19, x20, [sp, #-0x10]! 3896*4757b351SPierre Pronchery stp x21, x22, [sp, #-0x10]! 3897*4757b351SPierre Pronchery stp x23, x24, [sp, #-0x10]! 3898*4757b351SPierre Pronchery stp x25, x26, [sp, #-0x10]! 3899*4757b351SPierre Pronchery stp x27, x28, [sp, #-0x10]! 3900*4757b351SPierre Pronchery stp x29, x30, [sp, #-0x10]! 3901*4757b351SPierre Pronchery stp d8, d9, [sp, #-0x10]! 3902*4757b351SPierre Pronchery stp d10, d11, [sp, #-0x10]! 3903*4757b351SPierre Pronchery stp d12, d13, [sp, #-0x10]! 3904*4757b351SPierre Pronchery stp d14, d15, [sp, #-0x10]! 3905*4757b351SPierre Pronchery mov x26,x3 3906*4757b351SPierre Pronchery mov x27,x4 3907*4757b351SPierre Pronchery mov w28,w6 3908*4757b351SPierre Pronchery ld1 {v8.4s}, [x5] 3909*4757b351SPierre Pronchery mov x3,x27 3910*4757b351SPierre Pronchery adrp x10,.Lsbox 3911*4757b351SPierre Pronchery add x10,x10,#:lo12:.Lsbox 3912*4757b351SPierre Pronchery ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64 3913*4757b351SPierre Pronchery ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64 3914*4757b351SPierre Pronchery ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64 3915*4757b351SPierre Pronchery ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10] 3916*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 3917*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 3918*4757b351SPierre Pronchery#endif 3919*4757b351SPierre Pronchery mov x10,x3 3920*4757b351SPierre Pronchery mov w11,#8 3921*4757b351SPierre Pronchery mov w12,v8.s[0] 3922*4757b351SPierre Pronchery mov w13,v8.s[1] 3923*4757b351SPierre Pronchery mov w14,v8.s[2] 3924*4757b351SPierre Pronchery mov w15,v8.s[3] 3925*4757b351SPierre Pronchery10: 3926*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3927*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 3928*4757b351SPierre Pronchery eor w6,w14,w15 3929*4757b351SPierre Pronchery eor w9,w7,w13 3930*4757b351SPierre Pronchery eor w6,w6,w9 3931*4757b351SPierre Pronchery movi v1.16b,#64 3932*4757b351SPierre Pronchery movi v2.16b,#128 3933*4757b351SPierre Pronchery movi v3.16b,#192 3934*4757b351SPierre Pronchery mov v0.s[0],w6 3935*4757b351SPierre Pronchery 3936*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3937*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3938*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3939*4757b351SPierre Pronchery 3940*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3941*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3942*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3943*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3944*4757b351SPierre Pronchery 3945*4757b351SPierre Pronchery mov w6,v0.s[0] 3946*4757b351SPierre Pronchery mov w7,v1.s[0] 3947*4757b351SPierre Pronchery mov w9,v2.s[0] 3948*4757b351SPierre Pronchery add w7,w6,w7 3949*4757b351SPierre Pronchery mov w6,v3.s[0] 3950*4757b351SPierre Pronchery add w7,w7,w9 3951*4757b351SPierre Pronchery add w7,w7,w6 3952*4757b351SPierre Pronchery 3953*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3954*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3955*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3956*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3957*4757b351SPierre Pronchery eor w12,w12,w6 3958*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 3959*4757b351SPierre Pronchery eor w6,w14,w15 3960*4757b351SPierre Pronchery eor w9,w12,w8 3961*4757b351SPierre Pronchery eor w6,w6,w9 3962*4757b351SPierre Pronchery movi v1.16b,#64 3963*4757b351SPierre Pronchery movi v2.16b,#128 3964*4757b351SPierre Pronchery movi v3.16b,#192 3965*4757b351SPierre Pronchery mov v0.s[0],w6 3966*4757b351SPierre Pronchery 3967*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 3968*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 3969*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 3970*4757b351SPierre Pronchery 3971*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 3972*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 3973*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 3974*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 3975*4757b351SPierre Pronchery 3976*4757b351SPierre Pronchery mov w6,v0.s[0] 3977*4757b351SPierre Pronchery mov w7,v1.s[0] 3978*4757b351SPierre Pronchery mov w9,v2.s[0] 3979*4757b351SPierre Pronchery add w7,w6,w7 3980*4757b351SPierre Pronchery mov w6,v3.s[0] 3981*4757b351SPierre Pronchery add w7,w7,w9 3982*4757b351SPierre Pronchery add w7,w7,w6 3983*4757b351SPierre Pronchery 3984*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 3985*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 3986*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 3987*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 3988*4757b351SPierre Pronchery ldp w7,w8,[x10],8 3989*4757b351SPierre Pronchery eor w13,w13,w6 3990*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 3991*4757b351SPierre Pronchery eor w6,w12,w13 3992*4757b351SPierre Pronchery eor w9,w7,w15 3993*4757b351SPierre Pronchery eor w6,w6,w9 3994*4757b351SPierre Pronchery movi v1.16b,#64 3995*4757b351SPierre Pronchery movi v2.16b,#128 3996*4757b351SPierre Pronchery movi v3.16b,#192 3997*4757b351SPierre Pronchery mov v0.s[0],w6 3998*4757b351SPierre Pronchery 3999*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4000*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4001*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4002*4757b351SPierre Pronchery 4003*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4004*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4005*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4006*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4007*4757b351SPierre Pronchery 4008*4757b351SPierre Pronchery mov w6,v0.s[0] 4009*4757b351SPierre Pronchery mov w7,v1.s[0] 4010*4757b351SPierre Pronchery mov w9,v2.s[0] 4011*4757b351SPierre Pronchery add w7,w6,w7 4012*4757b351SPierre Pronchery mov w6,v3.s[0] 4013*4757b351SPierre Pronchery add w7,w7,w9 4014*4757b351SPierre Pronchery add w7,w7,w6 4015*4757b351SPierre Pronchery 4016*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4017*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4018*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4019*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4020*4757b351SPierre Pronchery eor w14,w14,w6 4021*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 4022*4757b351SPierre Pronchery eor w6,w12,w13 4023*4757b351SPierre Pronchery eor w9,w14,w8 4024*4757b351SPierre Pronchery eor w6,w6,w9 4025*4757b351SPierre Pronchery movi v1.16b,#64 4026*4757b351SPierre Pronchery movi v2.16b,#128 4027*4757b351SPierre Pronchery movi v3.16b,#192 4028*4757b351SPierre Pronchery mov v0.s[0],w6 4029*4757b351SPierre Pronchery 4030*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4031*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4032*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4033*4757b351SPierre Pronchery 4034*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4035*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4036*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4037*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4038*4757b351SPierre Pronchery 4039*4757b351SPierre Pronchery mov w6,v0.s[0] 4040*4757b351SPierre Pronchery mov w7,v1.s[0] 4041*4757b351SPierre Pronchery mov w9,v2.s[0] 4042*4757b351SPierre Pronchery add w7,w6,w7 4043*4757b351SPierre Pronchery mov w6,v3.s[0] 4044*4757b351SPierre Pronchery add w7,w7,w9 4045*4757b351SPierre Pronchery add w7,w7,w6 4046*4757b351SPierre Pronchery 4047*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4048*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4049*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4050*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4051*4757b351SPierre Pronchery eor w15,w15,w6 4052*4757b351SPierre Pronchery subs w11,w11,#1 4053*4757b351SPierre Pronchery b.ne 10b 4054*4757b351SPierre Pronchery mov v8.s[0],w15 4055*4757b351SPierre Pronchery mov v8.s[1],w14 4056*4757b351SPierre Pronchery mov v8.s[2],w13 4057*4757b351SPierre Pronchery mov v8.s[3],w12 4058*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4059*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 4060*4757b351SPierre Pronchery#endif 4061*4757b351SPierre Pronchery mov x3,x26 4062*4757b351SPierre Pronchery and x29,x2,#0x0F 4063*4757b351SPierre Pronchery // convert length into blocks 4064*4757b351SPierre Pronchery lsr x2,x2,4 4065*4757b351SPierre Pronchery cmp x2,#1 4066*4757b351SPierre Pronchery b.lt .return 4067*4757b351SPierre Pronchery 4068*4757b351SPierre Pronchery cmp x29,0 4069*4757b351SPierre Pronchery // If the encryption/decryption Length is N times of 16, 4070*4757b351SPierre Pronchery // the all blocks are encrypted/decrypted in .xts_encrypt_blocks 4071*4757b351SPierre Pronchery b.eq .xts_encrypt_blocks 4072*4757b351SPierre Pronchery 4073*4757b351SPierre Pronchery // If the encryption/decryption length is not N times of 16, 4074*4757b351SPierre Pronchery // the last two blocks are encrypted/decrypted in .last_2blks_tweak or .only_2blks_tweak 4075*4757b351SPierre Pronchery // the other blocks are encrypted/decrypted in .xts_encrypt_blocks 4076*4757b351SPierre Pronchery subs x2,x2,#1 4077*4757b351SPierre Pronchery b.eq .only_2blks_tweak 4078*4757b351SPierre Pronchery.xts_encrypt_blocks: 4079*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4080*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 4081*4757b351SPierre Pronchery#endif 4082*4757b351SPierre Pronchery mov x12,v8.d[0] 4083*4757b351SPierre Pronchery mov x13,v8.d[1] 4084*4757b351SPierre Pronchery mov w7,0x87 4085*4757b351SPierre Pronchery extr x9,x13,x13,#32 4086*4757b351SPierre Pronchery extr x15,x13,x12,#63 4087*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4088*4757b351SPierre Pronchery eor x14,x8,x12,lsl#1 4089*4757b351SPierre Pronchery mov w7,0x87 4090*4757b351SPierre Pronchery extr x9,x15,x15,#32 4091*4757b351SPierre Pronchery extr x17,x15,x14,#63 4092*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4093*4757b351SPierre Pronchery eor x16,x8,x14,lsl#1 4094*4757b351SPierre Pronchery mov w7,0x87 4095*4757b351SPierre Pronchery extr x9,x17,x17,#32 4096*4757b351SPierre Pronchery extr x19,x17,x16,#63 4097*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4098*4757b351SPierre Pronchery eor x18,x8,x16,lsl#1 4099*4757b351SPierre Pronchery mov w7,0x87 4100*4757b351SPierre Pronchery extr x9,x19,x19,#32 4101*4757b351SPierre Pronchery extr x21,x19,x18,#63 4102*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4103*4757b351SPierre Pronchery eor x20,x8,x18,lsl#1 4104*4757b351SPierre Pronchery mov w7,0x87 4105*4757b351SPierre Pronchery extr x9,x21,x21,#32 4106*4757b351SPierre Pronchery extr x23,x21,x20,#63 4107*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4108*4757b351SPierre Pronchery eor x22,x8,x20,lsl#1 4109*4757b351SPierre Pronchery mov w7,0x87 4110*4757b351SPierre Pronchery extr x9,x23,x23,#32 4111*4757b351SPierre Pronchery extr x25,x23,x22,#63 4112*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4113*4757b351SPierre Pronchery eor x24,x8,x22,lsl#1 4114*4757b351SPierre Pronchery mov w7,0x87 4115*4757b351SPierre Pronchery extr x9,x25,x25,#32 4116*4757b351SPierre Pronchery extr x27,x25,x24,#63 4117*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4118*4757b351SPierre Pronchery eor x26,x8,x24,lsl#1 4119*4757b351SPierre Pronchery.Lxts_8_blocks_process: 4120*4757b351SPierre Pronchery cmp x2,#8 4121*4757b351SPierre Pronchery b.lt .Lxts_4_blocks_process 4122*4757b351SPierre Pronchery mov v0.d[0],x12 4123*4757b351SPierre Pronchery mov v0.d[1],x13 4124*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4125*4757b351SPierre Pronchery rev32 v0.16b,v0.16b 4126*4757b351SPierre Pronchery#endif 4127*4757b351SPierre Pronchery mov v1.d[0],x14 4128*4757b351SPierre Pronchery mov v1.d[1],x15 4129*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4130*4757b351SPierre Pronchery rev32 v1.16b,v1.16b 4131*4757b351SPierre Pronchery#endif 4132*4757b351SPierre Pronchery mov v2.d[0],x16 4133*4757b351SPierre Pronchery mov v2.d[1],x17 4134*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4135*4757b351SPierre Pronchery rev32 v2.16b,v2.16b 4136*4757b351SPierre Pronchery#endif 4137*4757b351SPierre Pronchery mov v3.d[0],x18 4138*4757b351SPierre Pronchery mov v3.d[1],x19 4139*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4140*4757b351SPierre Pronchery rev32 v3.16b,v3.16b 4141*4757b351SPierre Pronchery#endif 4142*4757b351SPierre Pronchery mov v12.d[0],x20 4143*4757b351SPierre Pronchery mov v12.d[1],x21 4144*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4145*4757b351SPierre Pronchery rev32 v12.16b,v12.16b 4146*4757b351SPierre Pronchery#endif 4147*4757b351SPierre Pronchery mov v13.d[0],x22 4148*4757b351SPierre Pronchery mov v13.d[1],x23 4149*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4150*4757b351SPierre Pronchery rev32 v13.16b,v13.16b 4151*4757b351SPierre Pronchery#endif 4152*4757b351SPierre Pronchery mov v14.d[0],x24 4153*4757b351SPierre Pronchery mov v14.d[1],x25 4154*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4155*4757b351SPierre Pronchery rev32 v14.16b,v14.16b 4156*4757b351SPierre Pronchery#endif 4157*4757b351SPierre Pronchery mov v15.d[0],x26 4158*4757b351SPierre Pronchery mov v15.d[1],x27 4159*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4160*4757b351SPierre Pronchery rev32 v15.16b,v15.16b 4161*4757b351SPierre Pronchery#endif 4162*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 4163*4757b351SPierre Pronchery eor v4.16b, v4.16b, v0.16b 4164*4757b351SPierre Pronchery eor v5.16b, v5.16b, v1.16b 4165*4757b351SPierre Pronchery eor v6.16b, v6.16b, v2.16b 4166*4757b351SPierre Pronchery eor v7.16b, v7.16b, v3.16b 4167*4757b351SPierre Pronchery ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 4168*4757b351SPierre Pronchery eor v8.16b, v8.16b, v12.16b 4169*4757b351SPierre Pronchery eor v9.16b, v9.16b, v13.16b 4170*4757b351SPierre Pronchery eor v10.16b, v10.16b, v14.16b 4171*4757b351SPierre Pronchery eor v11.16b, v11.16b, v15.16b 4172*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4173*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4174*4757b351SPierre Pronchery#endif 4175*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4176*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 4177*4757b351SPierre Pronchery#endif 4178*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4179*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 4180*4757b351SPierre Pronchery#endif 4181*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4182*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 4183*4757b351SPierre Pronchery#endif 4184*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4185*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 4186*4757b351SPierre Pronchery#endif 4187*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4188*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 4189*4757b351SPierre Pronchery#endif 4190*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4191*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 4192*4757b351SPierre Pronchery#endif 4193*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4194*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 4195*4757b351SPierre Pronchery#endif 4196*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 4197*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 4198*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 4199*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 4200*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 4201*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 4202*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 4203*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 4204*4757b351SPierre Pronchery zip1 v0.4s,v8.4s,v9.4s 4205*4757b351SPierre Pronchery zip2 v1.4s,v8.4s,v9.4s 4206*4757b351SPierre Pronchery zip1 v2.4s,v10.4s,v11.4s 4207*4757b351SPierre Pronchery zip2 v3.4s,v10.4s,v11.4s 4208*4757b351SPierre Pronchery zip1 v8.2d,v0.2d,v2.2d 4209*4757b351SPierre Pronchery zip2 v9.2d,v0.2d,v2.2d 4210*4757b351SPierre Pronchery zip1 v10.2d,v1.2d,v3.2d 4211*4757b351SPierre Pronchery zip2 v11.2d,v1.2d,v3.2d 4212*4757b351SPierre Pronchery bl _vpsm4_enc_8blks 4213*4757b351SPierre Pronchery zip1 v8.4s,v0.4s,v1.4s 4214*4757b351SPierre Pronchery zip2 v9.4s,v0.4s,v1.4s 4215*4757b351SPierre Pronchery zip1 v10.4s,v2.4s,v3.4s 4216*4757b351SPierre Pronchery zip2 v11.4s,v2.4s,v3.4s 4217*4757b351SPierre Pronchery zip1 v0.2d,v8.2d,v10.2d 4218*4757b351SPierre Pronchery zip2 v1.2d,v8.2d,v10.2d 4219*4757b351SPierre Pronchery zip1 v2.2d,v9.2d,v11.2d 4220*4757b351SPierre Pronchery zip2 v3.2d,v9.2d,v11.2d 4221*4757b351SPierre Pronchery zip1 v8.4s,v4.4s,v5.4s 4222*4757b351SPierre Pronchery zip2 v9.4s,v4.4s,v5.4s 4223*4757b351SPierre Pronchery zip1 v10.4s,v6.4s,v7.4s 4224*4757b351SPierre Pronchery zip2 v11.4s,v6.4s,v7.4s 4225*4757b351SPierre Pronchery zip1 v4.2d,v8.2d,v10.2d 4226*4757b351SPierre Pronchery zip2 v5.2d,v8.2d,v10.2d 4227*4757b351SPierre Pronchery zip1 v6.2d,v9.2d,v11.2d 4228*4757b351SPierre Pronchery zip2 v7.2d,v9.2d,v11.2d 4229*4757b351SPierre Pronchery mov v12.d[0],x12 4230*4757b351SPierre Pronchery mov v12.d[1],x13 4231*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4232*4757b351SPierre Pronchery rev32 v12.16b,v12.16b 4233*4757b351SPierre Pronchery#endif 4234*4757b351SPierre Pronchery mov w7,0x87 4235*4757b351SPierre Pronchery extr x9,x27,x27,#32 4236*4757b351SPierre Pronchery extr x13,x27,x26,#63 4237*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4238*4757b351SPierre Pronchery eor x12,x8,x26,lsl#1 4239*4757b351SPierre Pronchery mov v13.d[0],x14 4240*4757b351SPierre Pronchery mov v13.d[1],x15 4241*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4242*4757b351SPierre Pronchery rev32 v13.16b,v13.16b 4243*4757b351SPierre Pronchery#endif 4244*4757b351SPierre Pronchery mov w7,0x87 4245*4757b351SPierre Pronchery extr x9,x13,x13,#32 4246*4757b351SPierre Pronchery extr x15,x13,x12,#63 4247*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4248*4757b351SPierre Pronchery eor x14,x8,x12,lsl#1 4249*4757b351SPierre Pronchery mov v14.d[0],x16 4250*4757b351SPierre Pronchery mov v14.d[1],x17 4251*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4252*4757b351SPierre Pronchery rev32 v14.16b,v14.16b 4253*4757b351SPierre Pronchery#endif 4254*4757b351SPierre Pronchery mov w7,0x87 4255*4757b351SPierre Pronchery extr x9,x15,x15,#32 4256*4757b351SPierre Pronchery extr x17,x15,x14,#63 4257*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4258*4757b351SPierre Pronchery eor x16,x8,x14,lsl#1 4259*4757b351SPierre Pronchery mov v15.d[0],x18 4260*4757b351SPierre Pronchery mov v15.d[1],x19 4261*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4262*4757b351SPierre Pronchery rev32 v15.16b,v15.16b 4263*4757b351SPierre Pronchery#endif 4264*4757b351SPierre Pronchery mov w7,0x87 4265*4757b351SPierre Pronchery extr x9,x17,x17,#32 4266*4757b351SPierre Pronchery extr x19,x17,x16,#63 4267*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4268*4757b351SPierre Pronchery eor x18,x8,x16,lsl#1 4269*4757b351SPierre Pronchery mov v8.d[0],x20 4270*4757b351SPierre Pronchery mov v8.d[1],x21 4271*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4272*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 4273*4757b351SPierre Pronchery#endif 4274*4757b351SPierre Pronchery mov w7,0x87 4275*4757b351SPierre Pronchery extr x9,x19,x19,#32 4276*4757b351SPierre Pronchery extr x21,x19,x18,#63 4277*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4278*4757b351SPierre Pronchery eor x20,x8,x18,lsl#1 4279*4757b351SPierre Pronchery mov v9.d[0],x22 4280*4757b351SPierre Pronchery mov v9.d[1],x23 4281*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4282*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 4283*4757b351SPierre Pronchery#endif 4284*4757b351SPierre Pronchery mov w7,0x87 4285*4757b351SPierre Pronchery extr x9,x21,x21,#32 4286*4757b351SPierre Pronchery extr x23,x21,x20,#63 4287*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4288*4757b351SPierre Pronchery eor x22,x8,x20,lsl#1 4289*4757b351SPierre Pronchery mov v10.d[0],x24 4290*4757b351SPierre Pronchery mov v10.d[1],x25 4291*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4292*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 4293*4757b351SPierre Pronchery#endif 4294*4757b351SPierre Pronchery mov w7,0x87 4295*4757b351SPierre Pronchery extr x9,x23,x23,#32 4296*4757b351SPierre Pronchery extr x25,x23,x22,#63 4297*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4298*4757b351SPierre Pronchery eor x24,x8,x22,lsl#1 4299*4757b351SPierre Pronchery mov v11.d[0],x26 4300*4757b351SPierre Pronchery mov v11.d[1],x27 4301*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4302*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 4303*4757b351SPierre Pronchery#endif 4304*4757b351SPierre Pronchery mov w7,0x87 4305*4757b351SPierre Pronchery extr x9,x25,x25,#32 4306*4757b351SPierre Pronchery extr x27,x25,x24,#63 4307*4757b351SPierre Pronchery and w8,w7,w9,asr#31 4308*4757b351SPierre Pronchery eor x26,x8,x24,lsl#1 4309*4757b351SPierre Pronchery eor v0.16b, v0.16b, v12.16b 4310*4757b351SPierre Pronchery eor v1.16b, v1.16b, v13.16b 4311*4757b351SPierre Pronchery eor v2.16b, v2.16b, v14.16b 4312*4757b351SPierre Pronchery eor v3.16b, v3.16b, v15.16b 4313*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 4314*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 4315*4757b351SPierre Pronchery eor v6.16b, v6.16b, v10.16b 4316*4757b351SPierre Pronchery eor v7.16b, v7.16b, v11.16b 4317*4757b351SPierre Pronchery 4318*4757b351SPierre Pronchery // save the last tweak 4319*4757b351SPierre Pronchery st1 {v11.4s},[x5] 4320*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 4321*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64 4322*4757b351SPierre Pronchery subs x2,x2,#8 4323*4757b351SPierre Pronchery b.gt .Lxts_8_blocks_process 4324*4757b351SPierre Pronchery b 100f 4325*4757b351SPierre Pronchery.Lxts_4_blocks_process: 4326*4757b351SPierre Pronchery mov v8.d[0],x12 4327*4757b351SPierre Pronchery mov v8.d[1],x13 4328*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4329*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 4330*4757b351SPierre Pronchery#endif 4331*4757b351SPierre Pronchery mov v9.d[0],x14 4332*4757b351SPierre Pronchery mov v9.d[1],x15 4333*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4334*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 4335*4757b351SPierre Pronchery#endif 4336*4757b351SPierre Pronchery mov v10.d[0],x16 4337*4757b351SPierre Pronchery mov v10.d[1],x17 4338*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4339*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 4340*4757b351SPierre Pronchery#endif 4341*4757b351SPierre Pronchery mov v11.d[0],x18 4342*4757b351SPierre Pronchery mov v11.d[1],x19 4343*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4344*4757b351SPierre Pronchery rev32 v11.16b,v11.16b 4345*4757b351SPierre Pronchery#endif 4346*4757b351SPierre Pronchery cmp x2,#4 4347*4757b351SPierre Pronchery b.lt 1f 4348*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 4349*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 4350*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 4351*4757b351SPierre Pronchery eor v6.16b, v6.16b, v10.16b 4352*4757b351SPierre Pronchery eor v7.16b, v7.16b, v11.16b 4353*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4354*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4355*4757b351SPierre Pronchery#endif 4356*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4357*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 4358*4757b351SPierre Pronchery#endif 4359*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4360*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 4361*4757b351SPierre Pronchery#endif 4362*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4363*4757b351SPierre Pronchery rev32 v7.16b,v7.16b 4364*4757b351SPierre Pronchery#endif 4365*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 4366*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 4367*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 4368*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 4369*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 4370*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 4371*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 4372*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 4373*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 4374*4757b351SPierre Pronchery zip1 v4.4s,v0.4s,v1.4s 4375*4757b351SPierre Pronchery zip2 v5.4s,v0.4s,v1.4s 4376*4757b351SPierre Pronchery zip1 v6.4s,v2.4s,v3.4s 4377*4757b351SPierre Pronchery zip2 v7.4s,v2.4s,v3.4s 4378*4757b351SPierre Pronchery zip1 v0.2d,v4.2d,v6.2d 4379*4757b351SPierre Pronchery zip2 v1.2d,v4.2d,v6.2d 4380*4757b351SPierre Pronchery zip1 v2.2d,v5.2d,v7.2d 4381*4757b351SPierre Pronchery zip2 v3.2d,v5.2d,v7.2d 4382*4757b351SPierre Pronchery eor v0.16b, v0.16b, v8.16b 4383*4757b351SPierre Pronchery eor v1.16b, v1.16b, v9.16b 4384*4757b351SPierre Pronchery eor v2.16b, v2.16b, v10.16b 4385*4757b351SPierre Pronchery eor v3.16b, v3.16b, v11.16b 4386*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64 4387*4757b351SPierre Pronchery sub x2,x2,#4 4388*4757b351SPierre Pronchery mov v8.d[0],x20 4389*4757b351SPierre Pronchery mov v8.d[1],x21 4390*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4391*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 4392*4757b351SPierre Pronchery#endif 4393*4757b351SPierre Pronchery mov v9.d[0],x22 4394*4757b351SPierre Pronchery mov v9.d[1],x23 4395*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4396*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 4397*4757b351SPierre Pronchery#endif 4398*4757b351SPierre Pronchery mov v10.d[0],x24 4399*4757b351SPierre Pronchery mov v10.d[1],x25 4400*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4401*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 4402*4757b351SPierre Pronchery#endif 4403*4757b351SPierre Pronchery // save the last tweak 4404*4757b351SPierre Pronchery st1 {v11.4s},[x5] 4405*4757b351SPierre Pronchery1: 4406*4757b351SPierre Pronchery // process last block 4407*4757b351SPierre Pronchery cmp x2,#1 4408*4757b351SPierre Pronchery b.lt 100f 4409*4757b351SPierre Pronchery b.gt 1f 4410*4757b351SPierre Pronchery ld1 {v4.4s},[x0],#16 4411*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 4412*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4413*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4414*4757b351SPierre Pronchery#endif 4415*4757b351SPierre Pronchery mov x10,x3 4416*4757b351SPierre Pronchery mov w11,#8 4417*4757b351SPierre Pronchery mov w12,v4.s[0] 4418*4757b351SPierre Pronchery mov w13,v4.s[1] 4419*4757b351SPierre Pronchery mov w14,v4.s[2] 4420*4757b351SPierre Pronchery mov w15,v4.s[3] 4421*4757b351SPierre Pronchery10: 4422*4757b351SPierre Pronchery ldp w7,w8,[x10],8 4423*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 4424*4757b351SPierre Pronchery eor w6,w14,w15 4425*4757b351SPierre Pronchery eor w9,w7,w13 4426*4757b351SPierre Pronchery eor w6,w6,w9 4427*4757b351SPierre Pronchery movi v1.16b,#64 4428*4757b351SPierre Pronchery movi v2.16b,#128 4429*4757b351SPierre Pronchery movi v3.16b,#192 4430*4757b351SPierre Pronchery mov v0.s[0],w6 4431*4757b351SPierre Pronchery 4432*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4433*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4434*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4435*4757b351SPierre Pronchery 4436*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4437*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4438*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4439*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4440*4757b351SPierre Pronchery 4441*4757b351SPierre Pronchery mov w6,v0.s[0] 4442*4757b351SPierre Pronchery mov w7,v1.s[0] 4443*4757b351SPierre Pronchery mov w9,v2.s[0] 4444*4757b351SPierre Pronchery add w7,w6,w7 4445*4757b351SPierre Pronchery mov w6,v3.s[0] 4446*4757b351SPierre Pronchery add w7,w7,w9 4447*4757b351SPierre Pronchery add w7,w7,w6 4448*4757b351SPierre Pronchery 4449*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4450*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4451*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4452*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4453*4757b351SPierre Pronchery eor w12,w12,w6 4454*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 4455*4757b351SPierre Pronchery eor w6,w14,w15 4456*4757b351SPierre Pronchery eor w9,w12,w8 4457*4757b351SPierre Pronchery eor w6,w6,w9 4458*4757b351SPierre Pronchery movi v1.16b,#64 4459*4757b351SPierre Pronchery movi v2.16b,#128 4460*4757b351SPierre Pronchery movi v3.16b,#192 4461*4757b351SPierre Pronchery mov v0.s[0],w6 4462*4757b351SPierre Pronchery 4463*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4464*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4465*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4466*4757b351SPierre Pronchery 4467*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4468*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4469*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4470*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4471*4757b351SPierre Pronchery 4472*4757b351SPierre Pronchery mov w6,v0.s[0] 4473*4757b351SPierre Pronchery mov w7,v1.s[0] 4474*4757b351SPierre Pronchery mov w9,v2.s[0] 4475*4757b351SPierre Pronchery add w7,w6,w7 4476*4757b351SPierre Pronchery mov w6,v3.s[0] 4477*4757b351SPierre Pronchery add w7,w7,w9 4478*4757b351SPierre Pronchery add w7,w7,w6 4479*4757b351SPierre Pronchery 4480*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4481*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4482*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4483*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4484*4757b351SPierre Pronchery ldp w7,w8,[x10],8 4485*4757b351SPierre Pronchery eor w13,w13,w6 4486*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 4487*4757b351SPierre Pronchery eor w6,w12,w13 4488*4757b351SPierre Pronchery eor w9,w7,w15 4489*4757b351SPierre Pronchery eor w6,w6,w9 4490*4757b351SPierre Pronchery movi v1.16b,#64 4491*4757b351SPierre Pronchery movi v2.16b,#128 4492*4757b351SPierre Pronchery movi v3.16b,#192 4493*4757b351SPierre Pronchery mov v0.s[0],w6 4494*4757b351SPierre Pronchery 4495*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4496*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4497*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4498*4757b351SPierre Pronchery 4499*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4500*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4501*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4502*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4503*4757b351SPierre Pronchery 4504*4757b351SPierre Pronchery mov w6,v0.s[0] 4505*4757b351SPierre Pronchery mov w7,v1.s[0] 4506*4757b351SPierre Pronchery mov w9,v2.s[0] 4507*4757b351SPierre Pronchery add w7,w6,w7 4508*4757b351SPierre Pronchery mov w6,v3.s[0] 4509*4757b351SPierre Pronchery add w7,w7,w9 4510*4757b351SPierre Pronchery add w7,w7,w6 4511*4757b351SPierre Pronchery 4512*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4513*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4514*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4515*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4516*4757b351SPierre Pronchery eor w14,w14,w6 4517*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 4518*4757b351SPierre Pronchery eor w6,w12,w13 4519*4757b351SPierre Pronchery eor w9,w14,w8 4520*4757b351SPierre Pronchery eor w6,w6,w9 4521*4757b351SPierre Pronchery movi v1.16b,#64 4522*4757b351SPierre Pronchery movi v2.16b,#128 4523*4757b351SPierre Pronchery movi v3.16b,#192 4524*4757b351SPierre Pronchery mov v0.s[0],w6 4525*4757b351SPierre Pronchery 4526*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4527*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4528*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4529*4757b351SPierre Pronchery 4530*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4531*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4532*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4533*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4534*4757b351SPierre Pronchery 4535*4757b351SPierre Pronchery mov w6,v0.s[0] 4536*4757b351SPierre Pronchery mov w7,v1.s[0] 4537*4757b351SPierre Pronchery mov w9,v2.s[0] 4538*4757b351SPierre Pronchery add w7,w6,w7 4539*4757b351SPierre Pronchery mov w6,v3.s[0] 4540*4757b351SPierre Pronchery add w7,w7,w9 4541*4757b351SPierre Pronchery add w7,w7,w6 4542*4757b351SPierre Pronchery 4543*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4544*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4545*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4546*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4547*4757b351SPierre Pronchery eor w15,w15,w6 4548*4757b351SPierre Pronchery subs w11,w11,#1 4549*4757b351SPierre Pronchery b.ne 10b 4550*4757b351SPierre Pronchery mov v4.s[0],w15 4551*4757b351SPierre Pronchery mov v4.s[1],w14 4552*4757b351SPierre Pronchery mov v4.s[2],w13 4553*4757b351SPierre Pronchery mov v4.s[3],w12 4554*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4555*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4556*4757b351SPierre Pronchery#endif 4557*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 4558*4757b351SPierre Pronchery st1 {v4.4s},[x1],#16 4559*4757b351SPierre Pronchery // save the last tweak 4560*4757b351SPierre Pronchery st1 {v8.4s},[x5] 4561*4757b351SPierre Pronchery b 100f 4562*4757b351SPierre Pronchery1: // process last 2 blocks 4563*4757b351SPierre Pronchery cmp x2,#2 4564*4757b351SPierre Pronchery b.gt 1f 4565*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s},[x0],#32 4566*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 4567*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 4568*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4569*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4570*4757b351SPierre Pronchery#endif 4571*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4572*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 4573*4757b351SPierre Pronchery#endif 4574*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 4575*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 4576*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 4577*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 4578*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 4579*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 4580*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 4581*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 4582*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 4583*4757b351SPierre Pronchery zip1 v4.4s,v0.4s,v1.4s 4584*4757b351SPierre Pronchery zip2 v5.4s,v0.4s,v1.4s 4585*4757b351SPierre Pronchery zip1 v6.4s,v2.4s,v3.4s 4586*4757b351SPierre Pronchery zip2 v7.4s,v2.4s,v3.4s 4587*4757b351SPierre Pronchery zip1 v0.2d,v4.2d,v6.2d 4588*4757b351SPierre Pronchery zip2 v1.2d,v4.2d,v6.2d 4589*4757b351SPierre Pronchery zip1 v2.2d,v5.2d,v7.2d 4590*4757b351SPierre Pronchery zip2 v3.2d,v5.2d,v7.2d 4591*4757b351SPierre Pronchery eor v0.16b, v0.16b, v8.16b 4592*4757b351SPierre Pronchery eor v1.16b, v1.16b, v9.16b 4593*4757b351SPierre Pronchery st1 {v0.4s,v1.4s},[x1],#32 4594*4757b351SPierre Pronchery // save the last tweak 4595*4757b351SPierre Pronchery st1 {v9.4s},[x5] 4596*4757b351SPierre Pronchery b 100f 4597*4757b351SPierre Pronchery1: // process last 3 blocks 4598*4757b351SPierre Pronchery ld1 {v4.4s,v5.4s,v6.4s},[x0],#48 4599*4757b351SPierre Pronchery eor v4.16b, v4.16b, v8.16b 4600*4757b351SPierre Pronchery eor v5.16b, v5.16b, v9.16b 4601*4757b351SPierre Pronchery eor v6.16b, v6.16b, v10.16b 4602*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4603*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4604*4757b351SPierre Pronchery#endif 4605*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4606*4757b351SPierre Pronchery rev32 v5.16b,v5.16b 4607*4757b351SPierre Pronchery#endif 4608*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4609*4757b351SPierre Pronchery rev32 v6.16b,v6.16b 4610*4757b351SPierre Pronchery#endif 4611*4757b351SPierre Pronchery zip1 v0.4s,v4.4s,v5.4s 4612*4757b351SPierre Pronchery zip2 v1.4s,v4.4s,v5.4s 4613*4757b351SPierre Pronchery zip1 v2.4s,v6.4s,v7.4s 4614*4757b351SPierre Pronchery zip2 v3.4s,v6.4s,v7.4s 4615*4757b351SPierre Pronchery zip1 v4.2d,v0.2d,v2.2d 4616*4757b351SPierre Pronchery zip2 v5.2d,v0.2d,v2.2d 4617*4757b351SPierre Pronchery zip1 v6.2d,v1.2d,v3.2d 4618*4757b351SPierre Pronchery zip2 v7.2d,v1.2d,v3.2d 4619*4757b351SPierre Pronchery bl _vpsm4_enc_4blks 4620*4757b351SPierre Pronchery zip1 v4.4s,v0.4s,v1.4s 4621*4757b351SPierre Pronchery zip2 v5.4s,v0.4s,v1.4s 4622*4757b351SPierre Pronchery zip1 v6.4s,v2.4s,v3.4s 4623*4757b351SPierre Pronchery zip2 v7.4s,v2.4s,v3.4s 4624*4757b351SPierre Pronchery zip1 v0.2d,v4.2d,v6.2d 4625*4757b351SPierre Pronchery zip2 v1.2d,v4.2d,v6.2d 4626*4757b351SPierre Pronchery zip1 v2.2d,v5.2d,v7.2d 4627*4757b351SPierre Pronchery zip2 v3.2d,v5.2d,v7.2d 4628*4757b351SPierre Pronchery eor v0.16b, v0.16b, v8.16b 4629*4757b351SPierre Pronchery eor v1.16b, v1.16b, v9.16b 4630*4757b351SPierre Pronchery eor v2.16b, v2.16b, v10.16b 4631*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s},[x1],#48 4632*4757b351SPierre Pronchery // save the last tweak 4633*4757b351SPierre Pronchery st1 {v10.4s},[x5] 4634*4757b351SPierre Pronchery100: 4635*4757b351SPierre Pronchery cmp x29,0 4636*4757b351SPierre Pronchery b.eq .return 4637*4757b351SPierre Pronchery 4638*4757b351SPierre Pronchery// This branch calculates the last two tweaks, 4639*4757b351SPierre Pronchery// while the encryption/decryption length is larger than 32 4640*4757b351SPierre Pronchery.last_2blks_tweak: 4641*4757b351SPierre Pronchery ld1 {v8.4s},[x5] 4642*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4643*4757b351SPierre Pronchery rev32 v8.16b,v8.16b 4644*4757b351SPierre Pronchery#endif 4645*4757b351SPierre Pronchery mov v2.16b,v8.16b 4646*4757b351SPierre Pronchery adrp x10,.Lxts_magic 4647*4757b351SPierre Pronchery ldr q0, [x10, #:lo12:.Lxts_magic] 4648*4757b351SPierre Pronchery shl v9.16b, v2.16b, #1 4649*4757b351SPierre Pronchery ext v1.16b, v2.16b, v2.16b,#15 4650*4757b351SPierre Pronchery ushr v1.16b, v1.16b, #7 4651*4757b351SPierre Pronchery mul v1.16b, v1.16b, v0.16b 4652*4757b351SPierre Pronchery eor v9.16b, v9.16b, v1.16b 4653*4757b351SPierre Pronchery mov v2.16b,v9.16b 4654*4757b351SPierre Pronchery adrp x10,.Lxts_magic 4655*4757b351SPierre Pronchery ldr q0, [x10, #:lo12:.Lxts_magic] 4656*4757b351SPierre Pronchery shl v10.16b, v2.16b, #1 4657*4757b351SPierre Pronchery ext v1.16b, v2.16b, v2.16b,#15 4658*4757b351SPierre Pronchery ushr v1.16b, v1.16b, #7 4659*4757b351SPierre Pronchery mul v1.16b, v1.16b, v0.16b 4660*4757b351SPierre Pronchery eor v10.16b, v10.16b, v1.16b 4661*4757b351SPierre Pronchery b .check_dec 4662*4757b351SPierre Pronchery 4663*4757b351SPierre Pronchery 4664*4757b351SPierre Pronchery// This branch calculates the last two tweaks, 4665*4757b351SPierre Pronchery// while the encryption/decryption length is equal to 32, who only need two tweaks 4666*4757b351SPierre Pronchery.only_2blks_tweak: 4667*4757b351SPierre Pronchery mov v9.16b,v8.16b 4668*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4669*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 4670*4757b351SPierre Pronchery#endif 4671*4757b351SPierre Pronchery mov v2.16b,v9.16b 4672*4757b351SPierre Pronchery adrp x10,.Lxts_magic 4673*4757b351SPierre Pronchery ldr q0, [x10, #:lo12:.Lxts_magic] 4674*4757b351SPierre Pronchery shl v10.16b, v2.16b, #1 4675*4757b351SPierre Pronchery ext v1.16b, v2.16b, v2.16b,#15 4676*4757b351SPierre Pronchery ushr v1.16b, v1.16b, #7 4677*4757b351SPierre Pronchery mul v1.16b, v1.16b, v0.16b 4678*4757b351SPierre Pronchery eor v10.16b, v10.16b, v1.16b 4679*4757b351SPierre Pronchery b .check_dec 4680*4757b351SPierre Pronchery 4681*4757b351SPierre Pronchery 4682*4757b351SPierre Pronchery// Determine whether encryption or decryption is required. 4683*4757b351SPierre Pronchery// The last two tweaks need to be swapped for decryption. 4684*4757b351SPierre Pronchery.check_dec: 4685*4757b351SPierre Pronchery // encryption:1 decryption:0 4686*4757b351SPierre Pronchery cmp w28,1 4687*4757b351SPierre Pronchery b.eq .process_last_2blks 4688*4757b351SPierre Pronchery mov v0.16B,v9.16b 4689*4757b351SPierre Pronchery mov v9.16B,v10.16b 4690*4757b351SPierre Pronchery mov v10.16B,v0.16b 4691*4757b351SPierre Pronchery 4692*4757b351SPierre Pronchery.process_last_2blks: 4693*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4694*4757b351SPierre Pronchery rev32 v9.16b,v9.16b 4695*4757b351SPierre Pronchery#endif 4696*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 4697*4757b351SPierre Pronchery rev32 v10.16b,v10.16b 4698*4757b351SPierre Pronchery#endif 4699*4757b351SPierre Pronchery ld1 {v4.4s},[x0],#16 4700*4757b351SPierre Pronchery eor v4.16b, v4.16b, v9.16b 4701*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4702*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4703*4757b351SPierre Pronchery#endif 4704*4757b351SPierre Pronchery mov x10,x3 4705*4757b351SPierre Pronchery mov w11,#8 4706*4757b351SPierre Pronchery mov w12,v4.s[0] 4707*4757b351SPierre Pronchery mov w13,v4.s[1] 4708*4757b351SPierre Pronchery mov w14,v4.s[2] 4709*4757b351SPierre Pronchery mov w15,v4.s[3] 4710*4757b351SPierre Pronchery10: 4711*4757b351SPierre Pronchery ldp w7,w8,[x10],8 4712*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 4713*4757b351SPierre Pronchery eor w6,w14,w15 4714*4757b351SPierre Pronchery eor w9,w7,w13 4715*4757b351SPierre Pronchery eor w6,w6,w9 4716*4757b351SPierre Pronchery movi v1.16b,#64 4717*4757b351SPierre Pronchery movi v2.16b,#128 4718*4757b351SPierre Pronchery movi v3.16b,#192 4719*4757b351SPierre Pronchery mov v0.s[0],w6 4720*4757b351SPierre Pronchery 4721*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4722*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4723*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4724*4757b351SPierre Pronchery 4725*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4726*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4727*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4728*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4729*4757b351SPierre Pronchery 4730*4757b351SPierre Pronchery mov w6,v0.s[0] 4731*4757b351SPierre Pronchery mov w7,v1.s[0] 4732*4757b351SPierre Pronchery mov w9,v2.s[0] 4733*4757b351SPierre Pronchery add w7,w6,w7 4734*4757b351SPierre Pronchery mov w6,v3.s[0] 4735*4757b351SPierre Pronchery add w7,w7,w9 4736*4757b351SPierre Pronchery add w7,w7,w6 4737*4757b351SPierre Pronchery 4738*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4739*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4740*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4741*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4742*4757b351SPierre Pronchery eor w12,w12,w6 4743*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 4744*4757b351SPierre Pronchery eor w6,w14,w15 4745*4757b351SPierre Pronchery eor w9,w12,w8 4746*4757b351SPierre Pronchery eor w6,w6,w9 4747*4757b351SPierre Pronchery movi v1.16b,#64 4748*4757b351SPierre Pronchery movi v2.16b,#128 4749*4757b351SPierre Pronchery movi v3.16b,#192 4750*4757b351SPierre Pronchery mov v0.s[0],w6 4751*4757b351SPierre Pronchery 4752*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4753*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4754*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4755*4757b351SPierre Pronchery 4756*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4757*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4758*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4759*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4760*4757b351SPierre Pronchery 4761*4757b351SPierre Pronchery mov w6,v0.s[0] 4762*4757b351SPierre Pronchery mov w7,v1.s[0] 4763*4757b351SPierre Pronchery mov w9,v2.s[0] 4764*4757b351SPierre Pronchery add w7,w6,w7 4765*4757b351SPierre Pronchery mov w6,v3.s[0] 4766*4757b351SPierre Pronchery add w7,w7,w9 4767*4757b351SPierre Pronchery add w7,w7,w6 4768*4757b351SPierre Pronchery 4769*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4770*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4771*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4772*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4773*4757b351SPierre Pronchery ldp w7,w8,[x10],8 4774*4757b351SPierre Pronchery eor w13,w13,w6 4775*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 4776*4757b351SPierre Pronchery eor w6,w12,w13 4777*4757b351SPierre Pronchery eor w9,w7,w15 4778*4757b351SPierre Pronchery eor w6,w6,w9 4779*4757b351SPierre Pronchery movi v1.16b,#64 4780*4757b351SPierre Pronchery movi v2.16b,#128 4781*4757b351SPierre Pronchery movi v3.16b,#192 4782*4757b351SPierre Pronchery mov v0.s[0],w6 4783*4757b351SPierre Pronchery 4784*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4785*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4786*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4787*4757b351SPierre Pronchery 4788*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4789*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4790*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4791*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4792*4757b351SPierre Pronchery 4793*4757b351SPierre Pronchery mov w6,v0.s[0] 4794*4757b351SPierre Pronchery mov w7,v1.s[0] 4795*4757b351SPierre Pronchery mov w9,v2.s[0] 4796*4757b351SPierre Pronchery add w7,w6,w7 4797*4757b351SPierre Pronchery mov w6,v3.s[0] 4798*4757b351SPierre Pronchery add w7,w7,w9 4799*4757b351SPierre Pronchery add w7,w7,w6 4800*4757b351SPierre Pronchery 4801*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4802*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4803*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4804*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4805*4757b351SPierre Pronchery eor w14,w14,w6 4806*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 4807*4757b351SPierre Pronchery eor w6,w12,w13 4808*4757b351SPierre Pronchery eor w9,w14,w8 4809*4757b351SPierre Pronchery eor w6,w6,w9 4810*4757b351SPierre Pronchery movi v1.16b,#64 4811*4757b351SPierre Pronchery movi v2.16b,#128 4812*4757b351SPierre Pronchery movi v3.16b,#192 4813*4757b351SPierre Pronchery mov v0.s[0],w6 4814*4757b351SPierre Pronchery 4815*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4816*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4817*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4818*4757b351SPierre Pronchery 4819*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4820*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4821*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4822*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4823*4757b351SPierre Pronchery 4824*4757b351SPierre Pronchery mov w6,v0.s[0] 4825*4757b351SPierre Pronchery mov w7,v1.s[0] 4826*4757b351SPierre Pronchery mov w9,v2.s[0] 4827*4757b351SPierre Pronchery add w7,w6,w7 4828*4757b351SPierre Pronchery mov w6,v3.s[0] 4829*4757b351SPierre Pronchery add w7,w7,w9 4830*4757b351SPierre Pronchery add w7,w7,w6 4831*4757b351SPierre Pronchery 4832*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4833*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4834*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4835*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4836*4757b351SPierre Pronchery eor w15,w15,w6 4837*4757b351SPierre Pronchery subs w11,w11,#1 4838*4757b351SPierre Pronchery b.ne 10b 4839*4757b351SPierre Pronchery mov v4.s[0],w15 4840*4757b351SPierre Pronchery mov v4.s[1],w14 4841*4757b351SPierre Pronchery mov v4.s[2],w13 4842*4757b351SPierre Pronchery mov v4.s[3],w12 4843*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4844*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4845*4757b351SPierre Pronchery#endif 4846*4757b351SPierre Pronchery eor v4.16b, v4.16b, v9.16b 4847*4757b351SPierre Pronchery st1 {v4.4s},[x1],#16 4848*4757b351SPierre Pronchery 4849*4757b351SPierre Pronchery sub x26,x1,16 4850*4757b351SPierre Pronchery.loop: 4851*4757b351SPierre Pronchery subs x29,x29,1 4852*4757b351SPierre Pronchery ldrb w7,[x26,x29] 4853*4757b351SPierre Pronchery ldrb w8,[x0,x29] 4854*4757b351SPierre Pronchery strb w8,[x26,x29] 4855*4757b351SPierre Pronchery strb w7,[x1,x29] 4856*4757b351SPierre Pronchery b.gt .loop 4857*4757b351SPierre Pronchery ld1 {v4.4s}, [x26] 4858*4757b351SPierre Pronchery eor v4.16b, v4.16b, v10.16b 4859*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 4860*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 4861*4757b351SPierre Pronchery#endif 4862*4757b351SPierre Pronchery mov x10,x3 4863*4757b351SPierre Pronchery mov w11,#8 4864*4757b351SPierre Pronchery mov w12,v4.s[0] 4865*4757b351SPierre Pronchery mov w13,v4.s[1] 4866*4757b351SPierre Pronchery mov w14,v4.s[2] 4867*4757b351SPierre Pronchery mov w15,v4.s[3] 4868*4757b351SPierre Pronchery10: 4869*4757b351SPierre Pronchery ldp w7,w8,[x10],8 4870*4757b351SPierre Pronchery // B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) 4871*4757b351SPierre Pronchery eor w6,w14,w15 4872*4757b351SPierre Pronchery eor w9,w7,w13 4873*4757b351SPierre Pronchery eor w6,w6,w9 4874*4757b351SPierre Pronchery movi v1.16b,#64 4875*4757b351SPierre Pronchery movi v2.16b,#128 4876*4757b351SPierre Pronchery movi v3.16b,#192 4877*4757b351SPierre Pronchery mov v0.s[0],w6 4878*4757b351SPierre Pronchery 4879*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4880*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4881*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4882*4757b351SPierre Pronchery 4883*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4884*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4885*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4886*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4887*4757b351SPierre Pronchery 4888*4757b351SPierre Pronchery mov w6,v0.s[0] 4889*4757b351SPierre Pronchery mov w7,v1.s[0] 4890*4757b351SPierre Pronchery mov w9,v2.s[0] 4891*4757b351SPierre Pronchery add w7,w6,w7 4892*4757b351SPierre Pronchery mov w6,v3.s[0] 4893*4757b351SPierre Pronchery add w7,w7,w9 4894*4757b351SPierre Pronchery add w7,w7,w6 4895*4757b351SPierre Pronchery 4896*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4897*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4898*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4899*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4900*4757b351SPierre Pronchery eor w12,w12,w6 4901*4757b351SPierre Pronchery // B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) 4902*4757b351SPierre Pronchery eor w6,w14,w15 4903*4757b351SPierre Pronchery eor w9,w12,w8 4904*4757b351SPierre Pronchery eor w6,w6,w9 4905*4757b351SPierre Pronchery movi v1.16b,#64 4906*4757b351SPierre Pronchery movi v2.16b,#128 4907*4757b351SPierre Pronchery movi v3.16b,#192 4908*4757b351SPierre Pronchery mov v0.s[0],w6 4909*4757b351SPierre Pronchery 4910*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4911*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4912*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4913*4757b351SPierre Pronchery 4914*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4915*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4916*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4917*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4918*4757b351SPierre Pronchery 4919*4757b351SPierre Pronchery mov w6,v0.s[0] 4920*4757b351SPierre Pronchery mov w7,v1.s[0] 4921*4757b351SPierre Pronchery mov w9,v2.s[0] 4922*4757b351SPierre Pronchery add w7,w6,w7 4923*4757b351SPierre Pronchery mov w6,v3.s[0] 4924*4757b351SPierre Pronchery add w7,w7,w9 4925*4757b351SPierre Pronchery add w7,w7,w6 4926*4757b351SPierre Pronchery 4927*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4928*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4929*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4930*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4931*4757b351SPierre Pronchery ldp w7,w8,[x10],8 4932*4757b351SPierre Pronchery eor w13,w13,w6 4933*4757b351SPierre Pronchery // B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) 4934*4757b351SPierre Pronchery eor w6,w12,w13 4935*4757b351SPierre Pronchery eor w9,w7,w15 4936*4757b351SPierre Pronchery eor w6,w6,w9 4937*4757b351SPierre Pronchery movi v1.16b,#64 4938*4757b351SPierre Pronchery movi v2.16b,#128 4939*4757b351SPierre Pronchery movi v3.16b,#192 4940*4757b351SPierre Pronchery mov v0.s[0],w6 4941*4757b351SPierre Pronchery 4942*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4943*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4944*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4945*4757b351SPierre Pronchery 4946*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4947*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4948*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4949*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4950*4757b351SPierre Pronchery 4951*4757b351SPierre Pronchery mov w6,v0.s[0] 4952*4757b351SPierre Pronchery mov w7,v1.s[0] 4953*4757b351SPierre Pronchery mov w9,v2.s[0] 4954*4757b351SPierre Pronchery add w7,w6,w7 4955*4757b351SPierre Pronchery mov w6,v3.s[0] 4956*4757b351SPierre Pronchery add w7,w7,w9 4957*4757b351SPierre Pronchery add w7,w7,w6 4958*4757b351SPierre Pronchery 4959*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4960*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4961*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4962*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4963*4757b351SPierre Pronchery eor w14,w14,w6 4964*4757b351SPierre Pronchery // B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) 4965*4757b351SPierre Pronchery eor w6,w12,w13 4966*4757b351SPierre Pronchery eor w9,w14,w8 4967*4757b351SPierre Pronchery eor w6,w6,w9 4968*4757b351SPierre Pronchery movi v1.16b,#64 4969*4757b351SPierre Pronchery movi v2.16b,#128 4970*4757b351SPierre Pronchery movi v3.16b,#192 4971*4757b351SPierre Pronchery mov v0.s[0],w6 4972*4757b351SPierre Pronchery 4973*4757b351SPierre Pronchery sub v1.16b,v0.16b,v1.16b 4974*4757b351SPierre Pronchery sub v2.16b,v0.16b,v2.16b 4975*4757b351SPierre Pronchery sub v3.16b,v0.16b,v3.16b 4976*4757b351SPierre Pronchery 4977*4757b351SPierre Pronchery tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b 4978*4757b351SPierre Pronchery tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b 4979*4757b351SPierre Pronchery tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b 4980*4757b351SPierre Pronchery tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b 4981*4757b351SPierre Pronchery 4982*4757b351SPierre Pronchery mov w6,v0.s[0] 4983*4757b351SPierre Pronchery mov w7,v1.s[0] 4984*4757b351SPierre Pronchery mov w9,v2.s[0] 4985*4757b351SPierre Pronchery add w7,w6,w7 4986*4757b351SPierre Pronchery mov w6,v3.s[0] 4987*4757b351SPierre Pronchery add w7,w7,w9 4988*4757b351SPierre Pronchery add w7,w7,w6 4989*4757b351SPierre Pronchery 4990*4757b351SPierre Pronchery eor w6,w7,w7,ror #32-2 4991*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-10 4992*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-18 4993*4757b351SPierre Pronchery eor w6,w6,w7,ror #32-24 4994*4757b351SPierre Pronchery eor w15,w15,w6 4995*4757b351SPierre Pronchery subs w11,w11,#1 4996*4757b351SPierre Pronchery b.ne 10b 4997*4757b351SPierre Pronchery mov v4.s[0],w15 4998*4757b351SPierre Pronchery mov v4.s[1],w14 4999*4757b351SPierre Pronchery mov v4.s[2],w13 5000*4757b351SPierre Pronchery mov v4.s[3],w12 5001*4757b351SPierre Pronchery#ifndef __AARCH64EB__ 5002*4757b351SPierre Pronchery rev32 v4.16b,v4.16b 5003*4757b351SPierre Pronchery#endif 5004*4757b351SPierre Pronchery eor v4.16b, v4.16b, v10.16b 5005*4757b351SPierre Pronchery st1 {v4.4s}, [x26] 5006*4757b351SPierre Pronchery.return: 5007*4757b351SPierre Pronchery ldp d14, d15, [sp], #0x10 5008*4757b351SPierre Pronchery ldp d12, d13, [sp], #0x10 5009*4757b351SPierre Pronchery ldp d10, d11, [sp], #0x10 5010*4757b351SPierre Pronchery ldp d8, d9, [sp], #0x10 5011*4757b351SPierre Pronchery ldp x29, x30, [sp], #0x10 5012*4757b351SPierre Pronchery ldp x27, x28, [sp], #0x10 5013*4757b351SPierre Pronchery ldp x25, x26, [sp], #0x10 5014*4757b351SPierre Pronchery ldp x23, x24, [sp], #0x10 5015*4757b351SPierre Pronchery ldp x21, x22, [sp], #0x10 5016*4757b351SPierre Pronchery ldp x19, x20, [sp], #0x10 5017*4757b351SPierre Pronchery ldp x17, x18, [sp], #0x10 5018*4757b351SPierre Pronchery ldp x15, x16, [sp], #0x10 5019*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 5020*4757b351SPierre Pronchery ret 5021*4757b351SPierre Pronchery.size vpsm4_xts_encrypt,.-vpsm4_xts_encrypt 5022