1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 249788fe2SArd Biesheuvel/* 349788fe2SArd Biesheuvel * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 449788fe2SArd Biesheuvel * 54edd7d01SArd Biesheuvel * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 649788fe2SArd Biesheuvel */ 749788fe2SArd Biesheuvel 849788fe2SArd Biesheuvel#include <linux/linkage.h> 9a2c435ccSArd Biesheuvel#include <asm/assembler.h> 1049788fe2SArd Biesheuvel 11b8e50548SMark Brown#define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func) 12b8e50548SMark Brown#define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func) 1349788fe2SArd Biesheuvel 142e5d2f33SArd Biesheuvel xtsmask .req v7 157367bfebSArd Biesheuvel cbciv .req v7 167367bfebSArd Biesheuvel vctr .req v4 172e5d2f33SArd Biesheuvel 182e5d2f33SArd Biesheuvel .macro xts_reload_mask, tmp 192e5d2f33SArd Biesheuvel xts_load_mask \tmp 202e5d2f33SArd Biesheuvel .endm 212e5d2f33SArd Biesheuvel 2267cfa5d3SArd Biesheuvel /* special case for the neon-bs driver calling into this one for CTS */ 2367cfa5d3SArd Biesheuvel .macro xts_cts_skip_tw, reg, lbl 2467cfa5d3SArd Biesheuvel tbnz \reg, #1, \lbl 2567cfa5d3SArd Biesheuvel .endm 2667cfa5d3SArd Biesheuvel 2749788fe2SArd Biesheuvel /* multiply by polynomial 'x' in GF(2^8) */ 2849788fe2SArd Biesheuvel .macro mul_by_x, out, in, temp, const 2949788fe2SArd Biesheuvel sshr \temp, \in, #7 304edd7d01SArd Biesheuvel shl \out, \in, #1 3149788fe2SArd Biesheuvel and \temp, \temp, \const 3249788fe2SArd Biesheuvel eor \out, \out, \temp 3349788fe2SArd Biesheuvel .endm 3449788fe2SArd Biesheuvel 354edd7d01SArd Biesheuvel /* multiply by polynomial 'x^2' in GF(2^8) */ 364edd7d01SArd Biesheuvel .macro mul_by_x2, out, in, temp, const 374edd7d01SArd Biesheuvel ushr \temp, \in, #6 384edd7d01SArd Biesheuvel shl \out, \in, #2 394edd7d01SArd Biesheuvel pmul \temp, \temp, \const 404edd7d01SArd Biesheuvel eor \out, \out, \temp 414edd7d01SArd Biesheuvel .endm 424edd7d01SArd Biesheuvel 4349788fe2SArd Biesheuvel /* preload the entire Sbox */ 4449788fe2SArd Biesheuvel .macro prepare, sbox, shiftrows, temp 454edd7d01SArd Biesheuvel movi v12.16b, #0x1b 4662c24708SArd Biesheuvel ldr_l q13, \shiftrows, \temp 4762c24708SArd Biesheuvel ldr_l q14, .Lror32by8, \temp 4862c24708SArd Biesheuvel adr_l \temp, \sbox 4949788fe2SArd Biesheuvel ld1 {v16.16b-v19.16b}, [\temp], #64 5049788fe2SArd Biesheuvel ld1 {v20.16b-v23.16b}, [\temp], #64 5149788fe2SArd Biesheuvel ld1 {v24.16b-v27.16b}, [\temp], #64 5249788fe2SArd Biesheuvel ld1 {v28.16b-v31.16b}, [\temp] 5349788fe2SArd Biesheuvel .endm 5449788fe2SArd Biesheuvel 5549788fe2SArd Biesheuvel /* do preload for encryption */ 5649788fe2SArd Biesheuvel .macro enc_prepare, ignore0, ignore1, temp 5758144b8dSArd Biesheuvel prepare crypto_aes_sbox, .LForward_ShiftRows, \temp 5849788fe2SArd Biesheuvel .endm 5949788fe2SArd Biesheuvel 6049788fe2SArd Biesheuvel .macro enc_switch_key, ignore0, ignore1, temp 6149788fe2SArd Biesheuvel /* do nothing */ 6249788fe2SArd Biesheuvel .endm 6349788fe2SArd Biesheuvel 6449788fe2SArd Biesheuvel /* do preload for decryption */ 6549788fe2SArd Biesheuvel .macro dec_prepare, ignore0, ignore1, temp 6658144b8dSArd Biesheuvel prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp 6749788fe2SArd Biesheuvel .endm 6849788fe2SArd Biesheuvel 691b069597SJilin Yuan /* apply SubBytes transformation using the preloaded Sbox */ 7049788fe2SArd Biesheuvel .macro sub_bytes, in 714edd7d01SArd Biesheuvel sub v9.16b, \in\().16b, v15.16b 7249788fe2SArd Biesheuvel tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 734edd7d01SArd Biesheuvel sub v10.16b, v9.16b, v15.16b 7449788fe2SArd Biesheuvel tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 754edd7d01SArd Biesheuvel sub v11.16b, v10.16b, v15.16b 7649788fe2SArd Biesheuvel tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 7749788fe2SArd Biesheuvel tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 7849788fe2SArd Biesheuvel .endm 7949788fe2SArd Biesheuvel 8049788fe2SArd Biesheuvel /* apply MixColumns transformation */ 814edd7d01SArd Biesheuvel .macro mix_columns, in, enc 824edd7d01SArd Biesheuvel .if \enc == 0 8349788fe2SArd Biesheuvel /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 844edd7d01SArd Biesheuvel mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 854edd7d01SArd Biesheuvel eor \in\().16b, \in\().16b, v8.16b 864edd7d01SArd Biesheuvel rev32 v8.8h, v8.8h 874edd7d01SArd Biesheuvel eor \in\().16b, \in\().16b, v8.16b 884edd7d01SArd Biesheuvel .endif 894edd7d01SArd Biesheuvel 904edd7d01SArd Biesheuvel mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 914edd7d01SArd Biesheuvel rev32 v8.8h, \in\().8h 924edd7d01SArd Biesheuvel eor v8.16b, v8.16b, v9.16b 934edd7d01SArd Biesheuvel eor \in\().16b, \in\().16b, v8.16b 944edd7d01SArd Biesheuvel tbl \in\().16b, {\in\().16b}, v14.16b 954edd7d01SArd Biesheuvel eor \in\().16b, \in\().16b, v8.16b 9649788fe2SArd Biesheuvel .endm 9749788fe2SArd Biesheuvel 9849788fe2SArd Biesheuvel .macro do_block, enc, in, rounds, rk, rkp, i 99a2c435ccSArd Biesheuvel ld1 {v15.4s}, [\rk] 10049788fe2SArd Biesheuvel add \rkp, \rk, #16 10149788fe2SArd Biesheuvel mov \i, \rounds 102*571e557cSArd Biesheuvel.La\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 1034edd7d01SArd Biesheuvel movi v15.16b, #0x40 10449788fe2SArd Biesheuvel tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 10549788fe2SArd Biesheuvel sub_bytes \in 106*571e557cSArd Biesheuvel sub \i, \i, #1 1074edd7d01SArd Biesheuvel ld1 {v15.4s}, [\rkp], #16 108*571e557cSArd Biesheuvel cbz \i, .Lb\@ 1094edd7d01SArd Biesheuvel mix_columns \in, \enc 110*571e557cSArd Biesheuvel b .La\@ 111*571e557cSArd Biesheuvel.Lb\@: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 11249788fe2SArd Biesheuvel .endm 11349788fe2SArd Biesheuvel 11449788fe2SArd Biesheuvel .macro encrypt_block, in, rounds, rk, rkp, i 11549788fe2SArd Biesheuvel do_block 1, \in, \rounds, \rk, \rkp, \i 11649788fe2SArd Biesheuvel .endm 11749788fe2SArd Biesheuvel 11849788fe2SArd Biesheuvel .macro decrypt_block, in, rounds, rk, rkp, i 11949788fe2SArd Biesheuvel do_block 0, \in, \rounds, \rk, \rkp, \i 12049788fe2SArd Biesheuvel .endm 12149788fe2SArd Biesheuvel 12249788fe2SArd Biesheuvel /* 12349788fe2SArd Biesheuvel * Interleaved versions: functionally equivalent to the 124e2174139SArd Biesheuvel * ones above, but applied to AES states in parallel. 12549788fe2SArd Biesheuvel */ 12649788fe2SArd Biesheuvel 12749788fe2SArd Biesheuvel .macro sub_bytes_4x, in0, in1, in2, in3 1284edd7d01SArd Biesheuvel sub v8.16b, \in0\().16b, v15.16b 12949788fe2SArd Biesheuvel tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 1304edd7d01SArd Biesheuvel sub v9.16b, \in1\().16b, v15.16b 13149788fe2SArd Biesheuvel tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 1324edd7d01SArd Biesheuvel sub v10.16b, \in2\().16b, v15.16b 13349788fe2SArd Biesheuvel tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 1344edd7d01SArd Biesheuvel sub v11.16b, \in3\().16b, v15.16b 13549788fe2SArd Biesheuvel tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 13649788fe2SArd Biesheuvel tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 13749788fe2SArd Biesheuvel tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 1384edd7d01SArd Biesheuvel sub v8.16b, v8.16b, v15.16b 13949788fe2SArd Biesheuvel tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 1404edd7d01SArd Biesheuvel sub v9.16b, v9.16b, v15.16b 14149788fe2SArd Biesheuvel tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 1424edd7d01SArd Biesheuvel sub v10.16b, v10.16b, v15.16b 14349788fe2SArd Biesheuvel tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 1444edd7d01SArd Biesheuvel sub v11.16b, v11.16b, v15.16b 14549788fe2SArd Biesheuvel tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 1464edd7d01SArd Biesheuvel sub v8.16b, v8.16b, v15.16b 14749788fe2SArd Biesheuvel tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 1484edd7d01SArd Biesheuvel sub v9.16b, v9.16b, v15.16b 14949788fe2SArd Biesheuvel tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 1504edd7d01SArd Biesheuvel sub v10.16b, v10.16b, v15.16b 15149788fe2SArd Biesheuvel tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 1524edd7d01SArd Biesheuvel sub v11.16b, v11.16b, v15.16b 15349788fe2SArd Biesheuvel tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 15449788fe2SArd Biesheuvel tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 15549788fe2SArd Biesheuvel tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 15649788fe2SArd Biesheuvel .endm 15749788fe2SArd Biesheuvel 15849788fe2SArd Biesheuvel .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 15949788fe2SArd Biesheuvel sshr \tmp0\().16b, \in0\().16b, #7 1604edd7d01SArd Biesheuvel shl \out0\().16b, \in0\().16b, #1 16149788fe2SArd Biesheuvel sshr \tmp1\().16b, \in1\().16b, #7 16249788fe2SArd Biesheuvel and \tmp0\().16b, \tmp0\().16b, \const\().16b 1634edd7d01SArd Biesheuvel shl \out1\().16b, \in1\().16b, #1 16449788fe2SArd Biesheuvel and \tmp1\().16b, \tmp1\().16b, \const\().16b 16549788fe2SArd Biesheuvel eor \out0\().16b, \out0\().16b, \tmp0\().16b 16649788fe2SArd Biesheuvel eor \out1\().16b, \out1\().16b, \tmp1\().16b 16749788fe2SArd Biesheuvel .endm 16849788fe2SArd Biesheuvel 1694edd7d01SArd Biesheuvel .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 1704edd7d01SArd Biesheuvel ushr \tmp0\().16b, \in0\().16b, #6 1714edd7d01SArd Biesheuvel shl \out0\().16b, \in0\().16b, #2 1724edd7d01SArd Biesheuvel ushr \tmp1\().16b, \in1\().16b, #6 1734edd7d01SArd Biesheuvel pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 1744edd7d01SArd Biesheuvel shl \out1\().16b, \in1\().16b, #2 1754edd7d01SArd Biesheuvel pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 1764edd7d01SArd Biesheuvel eor \out0\().16b, \out0\().16b, \tmp0\().16b 1774edd7d01SArd Biesheuvel eor \out1\().16b, \out1\().16b, \tmp1\().16b 1784edd7d01SArd Biesheuvel .endm 1794edd7d01SArd Biesheuvel 1804edd7d01SArd Biesheuvel .macro mix_columns_2x, in0, in1, enc 1814edd7d01SArd Biesheuvel .if \enc == 0 1824edd7d01SArd Biesheuvel /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 1834edd7d01SArd Biesheuvel mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 1844edd7d01SArd Biesheuvel eor \in0\().16b, \in0\().16b, v8.16b 1854edd7d01SArd Biesheuvel rev32 v8.8h, v8.8h 1864edd7d01SArd Biesheuvel eor \in1\().16b, \in1\().16b, v9.16b 1874edd7d01SArd Biesheuvel rev32 v9.8h, v9.8h 1884edd7d01SArd Biesheuvel eor \in0\().16b, \in0\().16b, v8.16b 1894edd7d01SArd Biesheuvel eor \in1\().16b, \in1\().16b, v9.16b 1904edd7d01SArd Biesheuvel .endif 1914edd7d01SArd Biesheuvel 1924edd7d01SArd Biesheuvel mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 19349788fe2SArd Biesheuvel rev32 v10.8h, \in0\().8h 19449788fe2SArd Biesheuvel rev32 v11.8h, \in1\().8h 1954edd7d01SArd Biesheuvel eor v10.16b, v10.16b, v8.16b 1964edd7d01SArd Biesheuvel eor v11.16b, v11.16b, v9.16b 1974edd7d01SArd Biesheuvel eor \in0\().16b, \in0\().16b, v10.16b 1984edd7d01SArd Biesheuvel eor \in1\().16b, \in1\().16b, v11.16b 1994edd7d01SArd Biesheuvel tbl \in0\().16b, {\in0\().16b}, v14.16b 2004edd7d01SArd Biesheuvel tbl \in1\().16b, {\in1\().16b}, v14.16b 2014edd7d01SArd Biesheuvel eor \in0\().16b, \in0\().16b, v10.16b 2024edd7d01SArd Biesheuvel eor \in1\().16b, \in1\().16b, v11.16b 20349788fe2SArd Biesheuvel .endm 20449788fe2SArd Biesheuvel 20549788fe2SArd Biesheuvel .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 206a2c435ccSArd Biesheuvel ld1 {v15.4s}, [\rk] 20749788fe2SArd Biesheuvel add \rkp, \rk, #16 20849788fe2SArd Biesheuvel mov \i, \rounds 209*571e557cSArd Biesheuvel.La\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 21049788fe2SArd Biesheuvel eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 21149788fe2SArd Biesheuvel eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 21249788fe2SArd Biesheuvel eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 2134edd7d01SArd Biesheuvel movi v15.16b, #0x40 21449788fe2SArd Biesheuvel tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 21549788fe2SArd Biesheuvel tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 21649788fe2SArd Biesheuvel tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 21749788fe2SArd Biesheuvel tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 2184edd7d01SArd Biesheuvel sub_bytes_4x \in0, \in1, \in2, \in3 219*571e557cSArd Biesheuvel sub \i, \i, #1 2204edd7d01SArd Biesheuvel ld1 {v15.4s}, [\rkp], #16 221*571e557cSArd Biesheuvel cbz \i, .Lb\@ 2224edd7d01SArd Biesheuvel mix_columns_2x \in0, \in1, \enc 2234edd7d01SArd Biesheuvel mix_columns_2x \in2, \in3, \enc 224*571e557cSArd Biesheuvel b .La\@ 225*571e557cSArd Biesheuvel.Lb\@: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 22649788fe2SArd Biesheuvel eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 22749788fe2SArd Biesheuvel eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 22849788fe2SArd Biesheuvel eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 22949788fe2SArd Biesheuvel .endm 23049788fe2SArd Biesheuvel 23149788fe2SArd Biesheuvel .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 23249788fe2SArd Biesheuvel do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 23349788fe2SArd Biesheuvel .endm 23449788fe2SArd Biesheuvel 23549788fe2SArd Biesheuvel .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 23649788fe2SArd Biesheuvel do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 23749788fe2SArd Biesheuvel .endm 23849788fe2SArd Biesheuvel 23949788fe2SArd Biesheuvel#include "aes-modes.S" 24049788fe2SArd Biesheuvel 24162c24708SArd Biesheuvel .section ".rodata", "a" 24258144b8dSArd Biesheuvel .align 4 2434edd7d01SArd Biesheuvel.LForward_ShiftRows: 2444edd7d01SArd Biesheuvel .octa 0x0b06010c07020d08030e09040f0a0500 2454edd7d01SArd Biesheuvel 2464edd7d01SArd Biesheuvel.LReverse_ShiftRows: 2474edd7d01SArd Biesheuvel .octa 0x0306090c0f0205080b0e0104070a0d00 2484edd7d01SArd Biesheuvel 2494edd7d01SArd Biesheuvel.Lror32by8: 2504edd7d01SArd Biesheuvel .octa 0x0c0f0e0d080b0a090407060500030201 251