1*4b908403SEric Biggers/* SPDX-License-Identifier: GPL-2.0-only */ 2*4b908403SEric Biggers/* 3*4b908403SEric Biggers * AES cipher for ARMv8 NEON 4*4b908403SEric Biggers * 5*4b908403SEric Biggers * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 6*4b908403SEric Biggers */ 7*4b908403SEric Biggers 8*4b908403SEric Biggers#include <linux/linkage.h> 9*4b908403SEric Biggers#include <asm/assembler.h> 10*4b908403SEric Biggers 11*4b908403SEric Biggers#define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func) 12*4b908403SEric Biggers#define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func) 13*4b908403SEric Biggers 14*4b908403SEric Biggers xtsmask .req v7 15*4b908403SEric Biggers cbciv .req v7 16*4b908403SEric Biggers vctr .req v4 17*4b908403SEric Biggers 18*4b908403SEric Biggers .macro xts_reload_mask, tmp 19*4b908403SEric Biggers xts_load_mask \tmp 20*4b908403SEric Biggers .endm 21*4b908403SEric Biggers 22*4b908403SEric Biggers /* special case for the neon-bs driver calling into this one for CTS */ 23*4b908403SEric Biggers .macro xts_cts_skip_tw, reg, lbl 24*4b908403SEric Biggers tbnz \reg, #1, \lbl 25*4b908403SEric Biggers .endm 26*4b908403SEric Biggers 27*4b908403SEric Biggers /* multiply by polynomial 'x' in GF(2^8) */ 28*4b908403SEric Biggers .macro mul_by_x, out, in, temp, const 29*4b908403SEric Biggers sshr \temp, \in, #7 30*4b908403SEric Biggers shl \out, \in, #1 31*4b908403SEric Biggers and \temp, \temp, \const 32*4b908403SEric Biggers eor \out, \out, \temp 33*4b908403SEric Biggers .endm 34*4b908403SEric Biggers 35*4b908403SEric Biggers /* multiply by polynomial 'x^2' in GF(2^8) */ 36*4b908403SEric Biggers .macro mul_by_x2, out, in, temp, const 37*4b908403SEric Biggers ushr \temp, \in, #6 38*4b908403SEric Biggers shl \out, \in, #2 39*4b908403SEric Biggers pmul \temp, \temp, \const 40*4b908403SEric Biggers eor \out, \out, \temp 41*4b908403SEric Biggers .endm 42*4b908403SEric Biggers 43*4b908403SEric Biggers /* preload the entire Sbox */ 44*4b908403SEric Biggers .macro prepare, sbox, shiftrows, temp 45*4b908403SEric Biggers movi v12.16b, #0x1b 46*4b908403SEric Biggers ldr_l q13, \shiftrows, \temp 47*4b908403SEric Biggers ldr_l q14, .Lror32by8, \temp 48*4b908403SEric Biggers adr_l \temp, \sbox 49*4b908403SEric Biggers ld1 {v16.16b-v19.16b}, [\temp], #64 50*4b908403SEric Biggers ld1 {v20.16b-v23.16b}, [\temp], #64 51*4b908403SEric Biggers ld1 {v24.16b-v27.16b}, [\temp], #64 52*4b908403SEric Biggers ld1 {v28.16b-v31.16b}, [\temp] 53*4b908403SEric Biggers .endm 54*4b908403SEric Biggers 55*4b908403SEric Biggers /* do preload for encryption */ 56*4b908403SEric Biggers .macro enc_prepare, ignore0, ignore1, temp 57*4b908403SEric Biggers prepare crypto_aes_sbox, .LForward_ShiftRows, \temp 58*4b908403SEric Biggers .endm 59*4b908403SEric Biggers 60*4b908403SEric Biggers .macro enc_switch_key, ignore0, ignore1, temp 61*4b908403SEric Biggers /* do nothing */ 62*4b908403SEric Biggers .endm 63*4b908403SEric Biggers 64*4b908403SEric Biggers /* do preload for decryption */ 65*4b908403SEric Biggers .macro dec_prepare, ignore0, ignore1, temp 66*4b908403SEric Biggers prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp 67*4b908403SEric Biggers .endm 68*4b908403SEric Biggers 69*4b908403SEric Biggers /* apply SubBytes transformation using the preloaded Sbox */ 70*4b908403SEric Biggers .macro sub_bytes, in 71*4b908403SEric Biggers sub v9.16b, \in\().16b, v15.16b 72*4b908403SEric Biggers tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 73*4b908403SEric Biggers sub v10.16b, v9.16b, v15.16b 74*4b908403SEric Biggers tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 75*4b908403SEric Biggers sub v11.16b, v10.16b, v15.16b 76*4b908403SEric Biggers tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 77*4b908403SEric Biggers tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 78*4b908403SEric Biggers .endm 79*4b908403SEric Biggers 80*4b908403SEric Biggers /* apply MixColumns transformation */ 81*4b908403SEric Biggers .macro mix_columns, in, enc 82*4b908403SEric Biggers .if \enc == 0 83*4b908403SEric Biggers /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 84*4b908403SEric Biggers mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 85*4b908403SEric Biggers eor \in\().16b, \in\().16b, v8.16b 86*4b908403SEric Biggers rev32 v8.8h, v8.8h 87*4b908403SEric Biggers eor \in\().16b, \in\().16b, v8.16b 88*4b908403SEric Biggers .endif 89*4b908403SEric Biggers 90*4b908403SEric Biggers mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 91*4b908403SEric Biggers rev32 v8.8h, \in\().8h 92*4b908403SEric Biggers eor v8.16b, v8.16b, v9.16b 93*4b908403SEric Biggers eor \in\().16b, \in\().16b, v8.16b 94*4b908403SEric Biggers tbl \in\().16b, {\in\().16b}, v14.16b 95*4b908403SEric Biggers eor \in\().16b, \in\().16b, v8.16b 96*4b908403SEric Biggers .endm 97*4b908403SEric Biggers 98*4b908403SEric Biggers .macro do_block, enc, in, rounds, rk, rkp, i 99*4b908403SEric Biggers ld1 {v15.4s}, [\rk] 100*4b908403SEric Biggers add \rkp, \rk, #16 101*4b908403SEric Biggers mov \i, \rounds 102*4b908403SEric Biggers.La\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 103*4b908403SEric Biggers movi v15.16b, #0x40 104*4b908403SEric Biggers tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 105*4b908403SEric Biggers sub_bytes \in 106*4b908403SEric Biggers sub \i, \i, #1 107*4b908403SEric Biggers ld1 {v15.4s}, [\rkp], #16 108*4b908403SEric Biggers cbz \i, .Lb\@ 109*4b908403SEric Biggers mix_columns \in, \enc 110*4b908403SEric Biggers b .La\@ 111*4b908403SEric Biggers.Lb\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 112*4b908403SEric Biggers .endm 113*4b908403SEric Biggers 114*4b908403SEric Biggers .macro encrypt_block, in, rounds, rk, rkp, i 115*4b908403SEric Biggers do_block 1, \in, \rounds, \rk, \rkp, \i 116*4b908403SEric Biggers .endm 117*4b908403SEric Biggers 118*4b908403SEric Biggers .macro decrypt_block, in, rounds, rk, rkp, i 119*4b908403SEric Biggers do_block 0, \in, \rounds, \rk, \rkp, \i 120*4b908403SEric Biggers .endm 121*4b908403SEric Biggers 122*4b908403SEric Biggers /* 123*4b908403SEric Biggers * Interleaved versions: functionally equivalent to the 124*4b908403SEric Biggers * ones above, but applied to AES states in parallel. 125*4b908403SEric Biggers */ 126*4b908403SEric Biggers 127*4b908403SEric Biggers .macro sub_bytes_4x, in0, in1, in2, in3 128*4b908403SEric Biggers sub v8.16b, \in0\().16b, v15.16b 129*4b908403SEric Biggers tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 130*4b908403SEric Biggers sub v9.16b, \in1\().16b, v15.16b 131*4b908403SEric Biggers tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 132*4b908403SEric Biggers sub v10.16b, \in2\().16b, v15.16b 133*4b908403SEric Biggers tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 134*4b908403SEric Biggers sub v11.16b, \in3\().16b, v15.16b 135*4b908403SEric Biggers tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 136*4b908403SEric Biggers tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 137*4b908403SEric Biggers tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 138*4b908403SEric Biggers sub v8.16b, v8.16b, v15.16b 139*4b908403SEric Biggers tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 140*4b908403SEric Biggers sub v9.16b, v9.16b, v15.16b 141*4b908403SEric Biggers tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 142*4b908403SEric Biggers sub v10.16b, v10.16b, v15.16b 143*4b908403SEric Biggers tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 144*4b908403SEric Biggers sub v11.16b, v11.16b, v15.16b 145*4b908403SEric Biggers tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 146*4b908403SEric Biggers sub v8.16b, v8.16b, v15.16b 147*4b908403SEric Biggers tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 148*4b908403SEric Biggers sub v9.16b, v9.16b, v15.16b 149*4b908403SEric Biggers tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 150*4b908403SEric Biggers sub v10.16b, v10.16b, v15.16b 151*4b908403SEric Biggers tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 152*4b908403SEric Biggers sub v11.16b, v11.16b, v15.16b 153*4b908403SEric Biggers tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 154*4b908403SEric Biggers tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 155*4b908403SEric Biggers tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 156*4b908403SEric Biggers .endm 157*4b908403SEric Biggers 158*4b908403SEric Biggers .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 159*4b908403SEric Biggers sshr \tmp0\().16b, \in0\().16b, #7 160*4b908403SEric Biggers shl \out0\().16b, \in0\().16b, #1 161*4b908403SEric Biggers sshr \tmp1\().16b, \in1\().16b, #7 162*4b908403SEric Biggers and \tmp0\().16b, \tmp0\().16b, \const\().16b 163*4b908403SEric Biggers shl \out1\().16b, \in1\().16b, #1 164*4b908403SEric Biggers and \tmp1\().16b, \tmp1\().16b, \const\().16b 165*4b908403SEric Biggers eor \out0\().16b, \out0\().16b, \tmp0\().16b 166*4b908403SEric Biggers eor \out1\().16b, \out1\().16b, \tmp1\().16b 167*4b908403SEric Biggers .endm 168*4b908403SEric Biggers 169*4b908403SEric Biggers .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 170*4b908403SEric Biggers ushr \tmp0\().16b, \in0\().16b, #6 171*4b908403SEric Biggers shl \out0\().16b, \in0\().16b, #2 172*4b908403SEric Biggers ushr \tmp1\().16b, \in1\().16b, #6 173*4b908403SEric Biggers pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 174*4b908403SEric Biggers shl \out1\().16b, \in1\().16b, #2 175*4b908403SEric Biggers pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 176*4b908403SEric Biggers eor \out0\().16b, \out0\().16b, \tmp0\().16b 177*4b908403SEric Biggers eor \out1\().16b, \out1\().16b, \tmp1\().16b 178*4b908403SEric Biggers .endm 179*4b908403SEric Biggers 180*4b908403SEric Biggers .macro mix_columns_2x, in0, in1, enc 181*4b908403SEric Biggers .if \enc == 0 182*4b908403SEric Biggers /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 183*4b908403SEric Biggers mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 184*4b908403SEric Biggers eor \in0\().16b, \in0\().16b, v8.16b 185*4b908403SEric Biggers rev32 v8.8h, v8.8h 186*4b908403SEric Biggers eor \in1\().16b, \in1\().16b, v9.16b 187*4b908403SEric Biggers rev32 v9.8h, v9.8h 188*4b908403SEric Biggers eor \in0\().16b, \in0\().16b, v8.16b 189*4b908403SEric Biggers eor \in1\().16b, \in1\().16b, v9.16b 190*4b908403SEric Biggers .endif 191*4b908403SEric Biggers 192*4b908403SEric Biggers mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 193*4b908403SEric Biggers rev32 v10.8h, \in0\().8h 194*4b908403SEric Biggers rev32 v11.8h, \in1\().8h 195*4b908403SEric Biggers eor v10.16b, v10.16b, v8.16b 196*4b908403SEric Biggers eor v11.16b, v11.16b, v9.16b 197*4b908403SEric Biggers eor \in0\().16b, \in0\().16b, v10.16b 198*4b908403SEric Biggers eor \in1\().16b, \in1\().16b, v11.16b 199*4b908403SEric Biggers tbl \in0\().16b, {\in0\().16b}, v14.16b 200*4b908403SEric Biggers tbl \in1\().16b, {\in1\().16b}, v14.16b 201*4b908403SEric Biggers eor \in0\().16b, \in0\().16b, v10.16b 202*4b908403SEric Biggers eor \in1\().16b, \in1\().16b, v11.16b 203*4b908403SEric Biggers .endm 204*4b908403SEric Biggers 205*4b908403SEric Biggers .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 206*4b908403SEric Biggers ld1 {v15.4s}, [\rk] 207*4b908403SEric Biggers add \rkp, \rk, #16 208*4b908403SEric Biggers mov \i, \rounds 209*4b908403SEric Biggers.La\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 210*4b908403SEric Biggers eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 211*4b908403SEric Biggers eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 212*4b908403SEric Biggers eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 213*4b908403SEric Biggers movi v15.16b, #0x40 214*4b908403SEric Biggers tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 215*4b908403SEric Biggers tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 216*4b908403SEric Biggers tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 217*4b908403SEric Biggers tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 218*4b908403SEric Biggers sub_bytes_4x \in0, \in1, \in2, \in3 219*4b908403SEric Biggers sub \i, \i, #1 220*4b908403SEric Biggers ld1 {v15.4s}, [\rkp], #16 221*4b908403SEric Biggers cbz \i, .Lb\@ 222*4b908403SEric Biggers mix_columns_2x \in0, \in1, \enc 223*4b908403SEric Biggers mix_columns_2x \in2, \in3, \enc 224*4b908403SEric Biggers b .La\@ 225*4b908403SEric Biggers.Lb\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 226*4b908403SEric Biggers eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 227*4b908403SEric Biggers eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 228*4b908403SEric Biggers eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 229*4b908403SEric Biggers .endm 230*4b908403SEric Biggers 231*4b908403SEric Biggers .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 232*4b908403SEric Biggers do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 233*4b908403SEric Biggers .endm 234*4b908403SEric Biggers 235*4b908403SEric Biggers .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 236*4b908403SEric Biggers do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 237*4b908403SEric Biggers .endm 238*4b908403SEric Biggers 239*4b908403SEric Biggers#include "aes-modes.S" 240*4b908403SEric Biggers 241*4b908403SEric Biggers .section ".rodata", "a" 242*4b908403SEric Biggers .align 4 243*4b908403SEric Biggers.LForward_ShiftRows: 244*4b908403SEric Biggers .octa 0x0b06010c07020d08030e09040f0a0500 245*4b908403SEric Biggers 246*4b908403SEric Biggers.LReverse_ShiftRows: 247*4b908403SEric Biggers .octa 0x0306090c0f0205080b0e0104070a0d00 248*4b908403SEric Biggers 249*4b908403SEric Biggers.Lror32by8: 250*4b908403SEric Biggers .octa 0x0c0f0e0d080b0a090407060500030201 251