1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 21abee99eSArd Biesheuvel/* 31abee99eSArd Biesheuvel * Bit sliced AES using NEON instructions 41abee99eSArd Biesheuvel * 51abee99eSArd Biesheuvel * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> 61abee99eSArd Biesheuvel */ 71abee99eSArd Biesheuvel 81abee99eSArd Biesheuvel/* 91abee99eSArd Biesheuvel * The algorithm implemented here is described in detail by the paper 101abee99eSArd Biesheuvel * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and 111abee99eSArd Biesheuvel * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) 121abee99eSArd Biesheuvel * 131abee99eSArd Biesheuvel * This implementation is based primarily on the OpenSSL implementation 141abee99eSArd Biesheuvel * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> 151abee99eSArd Biesheuvel */ 161abee99eSArd Biesheuvel 171abee99eSArd Biesheuvel#include <linux/linkage.h> 18*47446d7cSEric Biggers#include <linux/cfi_types.h> 191abee99eSArd Biesheuvel#include <asm/assembler.h> 201abee99eSArd Biesheuvel 211abee99eSArd Biesheuvel .text 221abee99eSArd Biesheuvel 231abee99eSArd Biesheuvel rounds .req x11 241abee99eSArd Biesheuvel bskey .req x12 251abee99eSArd Biesheuvel 261abee99eSArd Biesheuvel .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 271abee99eSArd Biesheuvel eor \b2, \b2, \b1 281abee99eSArd Biesheuvel eor \b5, \b5, \b6 291abee99eSArd Biesheuvel eor \b3, \b3, \b0 301abee99eSArd Biesheuvel eor \b6, \b6, \b2 311abee99eSArd Biesheuvel eor \b5, \b5, \b0 321abee99eSArd Biesheuvel eor \b6, \b6, \b3 331abee99eSArd Biesheuvel eor \b3, \b3, \b7 341abee99eSArd Biesheuvel eor \b7, \b7, \b5 351abee99eSArd Biesheuvel eor \b3, \b3, \b4 361abee99eSArd Biesheuvel eor \b4, \b4, \b5 371abee99eSArd Biesheuvel eor \b2, \b2, \b7 381abee99eSArd Biesheuvel eor \b3, \b3, \b1 391abee99eSArd Biesheuvel eor \b1, \b1, \b5 401abee99eSArd Biesheuvel .endm 411abee99eSArd Biesheuvel 421abee99eSArd Biesheuvel .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 431abee99eSArd Biesheuvel eor \b0, \b0, \b6 441abee99eSArd Biesheuvel eor \b1, \b1, \b4 451abee99eSArd Biesheuvel eor \b4, \b4, \b6 461abee99eSArd Biesheuvel eor \b2, \b2, \b0 471abee99eSArd Biesheuvel eor \b6, \b6, \b1 481abee99eSArd Biesheuvel eor \b1, \b1, \b5 491abee99eSArd Biesheuvel eor \b5, \b5, \b3 501abee99eSArd Biesheuvel eor \b3, \b3, \b7 511abee99eSArd Biesheuvel eor \b7, \b7, \b5 521abee99eSArd Biesheuvel eor \b2, \b2, \b5 531abee99eSArd Biesheuvel eor \b4, \b4, \b7 541abee99eSArd Biesheuvel .endm 551abee99eSArd Biesheuvel 561abee99eSArd Biesheuvel .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 571abee99eSArd Biesheuvel eor \b1, \b1, \b7 581abee99eSArd Biesheuvel eor \b4, \b4, \b7 591abee99eSArd Biesheuvel eor \b7, \b7, \b5 601abee99eSArd Biesheuvel eor \b1, \b1, \b3 611abee99eSArd Biesheuvel eor \b2, \b2, \b5 621abee99eSArd Biesheuvel eor \b3, \b3, \b7 631abee99eSArd Biesheuvel eor \b6, \b6, \b1 641abee99eSArd Biesheuvel eor \b2, \b2, \b0 651abee99eSArd Biesheuvel eor \b5, \b5, \b3 661abee99eSArd Biesheuvel eor \b4, \b4, \b6 671abee99eSArd Biesheuvel eor \b0, \b0, \b6 681abee99eSArd Biesheuvel eor \b1, \b1, \b4 691abee99eSArd Biesheuvel .endm 701abee99eSArd Biesheuvel 711abee99eSArd Biesheuvel .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 721abee99eSArd Biesheuvel eor \b1, \b1, \b5 731abee99eSArd Biesheuvel eor \b2, \b2, \b7 741abee99eSArd Biesheuvel eor \b3, \b3, \b1 751abee99eSArd Biesheuvel eor \b4, \b4, \b5 761abee99eSArd Biesheuvel eor \b7, \b7, \b5 771abee99eSArd Biesheuvel eor \b3, \b3, \b4 781abee99eSArd Biesheuvel eor \b5, \b5, \b0 791abee99eSArd Biesheuvel eor \b3, \b3, \b7 801abee99eSArd Biesheuvel eor \b6, \b6, \b2 811abee99eSArd Biesheuvel eor \b2, \b2, \b1 821abee99eSArd Biesheuvel eor \b6, \b6, \b3 831abee99eSArd Biesheuvel eor \b3, \b3, \b0 841abee99eSArd Biesheuvel eor \b5, \b5, \b6 851abee99eSArd Biesheuvel .endm 861abee99eSArd Biesheuvel 871abee99eSArd Biesheuvel .macro mul_gf4, x0, x1, y0, y1, t0, t1 881abee99eSArd Biesheuvel eor \t0, \y0, \y1 891abee99eSArd Biesheuvel and \t0, \t0, \x0 901abee99eSArd Biesheuvel eor \x0, \x0, \x1 911abee99eSArd Biesheuvel and \t1, \x1, \y0 921abee99eSArd Biesheuvel and \x0, \x0, \y1 931abee99eSArd Biesheuvel eor \x1, \t1, \t0 941abee99eSArd Biesheuvel eor \x0, \x0, \t1 951abee99eSArd Biesheuvel .endm 961abee99eSArd Biesheuvel 971abee99eSArd Biesheuvel .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 981abee99eSArd Biesheuvel eor \t0, \y0, \y1 991abee99eSArd Biesheuvel eor \t1, \y2, \y3 1001abee99eSArd Biesheuvel and \t0, \t0, \x0 1011abee99eSArd Biesheuvel and \t1, \t1, \x2 1021abee99eSArd Biesheuvel eor \x0, \x0, \x1 1031abee99eSArd Biesheuvel eor \x2, \x2, \x3 1041abee99eSArd Biesheuvel and \x1, \x1, \y0 1051abee99eSArd Biesheuvel and \x3, \x3, \y2 1061abee99eSArd Biesheuvel and \x0, \x0, \y1 1071abee99eSArd Biesheuvel and \x2, \x2, \y3 1081abee99eSArd Biesheuvel eor \x1, \x1, \x0 1091abee99eSArd Biesheuvel eor \x2, \x2, \x3 1101abee99eSArd Biesheuvel eor \x0, \x0, \t0 1111abee99eSArd Biesheuvel eor \x3, \x3, \t1 1121abee99eSArd Biesheuvel .endm 1131abee99eSArd Biesheuvel 1141abee99eSArd Biesheuvel .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ 1151abee99eSArd Biesheuvel y0, y1, y2, y3, t0, t1, t2, t3 1161abee99eSArd Biesheuvel eor \t0, \x0, \x2 1171abee99eSArd Biesheuvel eor \t1, \x1, \x3 1181abee99eSArd Biesheuvel mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 1191abee99eSArd Biesheuvel eor \y0, \y0, \y2 1201abee99eSArd Biesheuvel eor \y1, \y1, \y3 1211abee99eSArd Biesheuvel mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 1221abee99eSArd Biesheuvel eor \x0, \x0, \t0 1231abee99eSArd Biesheuvel eor \x2, \x2, \t0 1241abee99eSArd Biesheuvel eor \x1, \x1, \t1 1251abee99eSArd Biesheuvel eor \x3, \x3, \t1 1261abee99eSArd Biesheuvel eor \t0, \x4, \x6 1271abee99eSArd Biesheuvel eor \t1, \x5, \x7 1281abee99eSArd Biesheuvel mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 1291abee99eSArd Biesheuvel eor \y0, \y0, \y2 1301abee99eSArd Biesheuvel eor \y1, \y1, \y3 1311abee99eSArd Biesheuvel mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 1321abee99eSArd Biesheuvel eor \x4, \x4, \t0 1331abee99eSArd Biesheuvel eor \x6, \x6, \t0 1341abee99eSArd Biesheuvel eor \x5, \x5, \t1 1351abee99eSArd Biesheuvel eor \x7, \x7, \t1 1361abee99eSArd Biesheuvel .endm 1371abee99eSArd Biesheuvel 1381abee99eSArd Biesheuvel .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ 1391abee99eSArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 1401abee99eSArd Biesheuvel eor \t3, \x4, \x6 1411abee99eSArd Biesheuvel eor \t0, \x5, \x7 1421abee99eSArd Biesheuvel eor \t1, \x1, \x3 1431abee99eSArd Biesheuvel eor \s1, \x7, \x6 1441abee99eSArd Biesheuvel eor \s0, \x0, \x2 1451abee99eSArd Biesheuvel eor \s3, \t3, \t0 1461abee99eSArd Biesheuvel orr \t2, \t0, \t1 1471abee99eSArd Biesheuvel and \s2, \t3, \s0 1481abee99eSArd Biesheuvel orr \t3, \t3, \s0 1491abee99eSArd Biesheuvel eor \s0, \s0, \t1 1501abee99eSArd Biesheuvel and \t0, \t0, \t1 1511abee99eSArd Biesheuvel eor \t1, \x3, \x2 1521abee99eSArd Biesheuvel and \s3, \s3, \s0 1531abee99eSArd Biesheuvel and \s1, \s1, \t1 1541abee99eSArd Biesheuvel eor \t1, \x4, \x5 1551abee99eSArd Biesheuvel eor \s0, \x1, \x0 1561abee99eSArd Biesheuvel eor \t3, \t3, \s1 1571abee99eSArd Biesheuvel eor \t2, \t2, \s1 1581abee99eSArd Biesheuvel and \s1, \t1, \s0 1591abee99eSArd Biesheuvel orr \t1, \t1, \s0 1601abee99eSArd Biesheuvel eor \t3, \t3, \s3 1611abee99eSArd Biesheuvel eor \t0, \t0, \s1 1621abee99eSArd Biesheuvel eor \t2, \t2, \s2 1631abee99eSArd Biesheuvel eor \t1, \t1, \s3 1641abee99eSArd Biesheuvel eor \t0, \t0, \s2 1651abee99eSArd Biesheuvel and \s0, \x7, \x3 1661abee99eSArd Biesheuvel eor \t1, \t1, \s2 1671abee99eSArd Biesheuvel and \s1, \x6, \x2 1681abee99eSArd Biesheuvel and \s2, \x5, \x1 1691abee99eSArd Biesheuvel orr \s3, \x4, \x0 1701abee99eSArd Biesheuvel eor \t3, \t3, \s0 1711abee99eSArd Biesheuvel eor \t1, \t1, \s2 1721abee99eSArd Biesheuvel eor \s0, \t0, \s3 1731abee99eSArd Biesheuvel eor \t2, \t2, \s1 1741abee99eSArd Biesheuvel and \s2, \t3, \t1 1751abee99eSArd Biesheuvel eor \s1, \t2, \s2 1761abee99eSArd Biesheuvel eor \s3, \s0, \s2 1771abee99eSArd Biesheuvel bsl \s1, \t1, \s0 1781abee99eSArd Biesheuvel not \t0, \s0 1791abee99eSArd Biesheuvel bsl \s0, \s1, \s3 1801abee99eSArd Biesheuvel bsl \t0, \s1, \s3 1811abee99eSArd Biesheuvel bsl \s3, \t3, \t2 1821abee99eSArd Biesheuvel eor \t3, \t3, \t2 1831abee99eSArd Biesheuvel and \s2, \s0, \s3 1841abee99eSArd Biesheuvel eor \t1, \t1, \t0 1851abee99eSArd Biesheuvel eor \s2, \s2, \t3 1861abee99eSArd Biesheuvel mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 1871abee99eSArd Biesheuvel \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 1881abee99eSArd Biesheuvel .endm 1891abee99eSArd Biesheuvel 1901abee99eSArd Biesheuvel .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 1911abee99eSArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 1921abee99eSArd Biesheuvel in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ 1931abee99eSArd Biesheuvel \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b 1941abee99eSArd Biesheuvel inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \ 1951abee99eSArd Biesheuvel \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ 1961abee99eSArd Biesheuvel \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ 1971abee99eSArd Biesheuvel \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b 1981abee99eSArd Biesheuvel out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ 1991abee99eSArd Biesheuvel \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b 2001abee99eSArd Biesheuvel .endm 2011abee99eSArd Biesheuvel 2021abee99eSArd Biesheuvel .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 2031abee99eSArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 2041abee99eSArd Biesheuvel inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ 2051abee99eSArd Biesheuvel \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b 2061abee99eSArd Biesheuvel inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \ 2071abee99eSArd Biesheuvel \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ 2081abee99eSArd Biesheuvel \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ 2091abee99eSArd Biesheuvel \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b 2101abee99eSArd Biesheuvel inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ 2111abee99eSArd Biesheuvel \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b 2121abee99eSArd Biesheuvel .endm 2131abee99eSArd Biesheuvel 2141abee99eSArd Biesheuvel .macro enc_next_rk 2151abee99eSArd Biesheuvel ldp q16, q17, [bskey], #128 2161abee99eSArd Biesheuvel ldp q18, q19, [bskey, #-96] 2171abee99eSArd Biesheuvel ldp q20, q21, [bskey, #-64] 2181abee99eSArd Biesheuvel ldp q22, q23, [bskey, #-32] 2191abee99eSArd Biesheuvel .endm 2201abee99eSArd Biesheuvel 2211abee99eSArd Biesheuvel .macro dec_next_rk 2221abee99eSArd Biesheuvel ldp q16, q17, [bskey, #-128]! 2231abee99eSArd Biesheuvel ldp q18, q19, [bskey, #32] 2241abee99eSArd Biesheuvel ldp q20, q21, [bskey, #64] 2251abee99eSArd Biesheuvel ldp q22, q23, [bskey, #96] 2261abee99eSArd Biesheuvel .endm 2271abee99eSArd Biesheuvel 2281abee99eSArd Biesheuvel .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7 2291abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, v16.16b 2301abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, v17.16b 2311abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, v18.16b 2321abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, v19.16b 2331abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, v20.16b 2341abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, v21.16b 2351abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, v22.16b 2361abee99eSArd Biesheuvel eor \x7\().16b, \x7\().16b, v23.16b 2371abee99eSArd Biesheuvel .endm 2381abee99eSArd Biesheuvel 2391abee99eSArd Biesheuvel .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask 2401abee99eSArd Biesheuvel tbl \x0\().16b, {\x0\().16b}, \mask\().16b 2411abee99eSArd Biesheuvel tbl \x1\().16b, {\x1\().16b}, \mask\().16b 2421abee99eSArd Biesheuvel tbl \x2\().16b, {\x2\().16b}, \mask\().16b 2431abee99eSArd Biesheuvel tbl \x3\().16b, {\x3\().16b}, \mask\().16b 2441abee99eSArd Biesheuvel tbl \x4\().16b, {\x4\().16b}, \mask\().16b 2451abee99eSArd Biesheuvel tbl \x5\().16b, {\x5\().16b}, \mask\().16b 2461abee99eSArd Biesheuvel tbl \x6\().16b, {\x6\().16b}, \mask\().16b 2471abee99eSArd Biesheuvel tbl \x7\().16b, {\x7\().16b}, \mask\().16b 2481abee99eSArd Biesheuvel .endm 2491abee99eSArd Biesheuvel 2501abee99eSArd Biesheuvel .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 2511abee99eSArd Biesheuvel t0, t1, t2, t3, t4, t5, t6, t7, inv 2521abee99eSArd Biesheuvel ext \t0\().16b, \x0\().16b, \x0\().16b, #12 2531abee99eSArd Biesheuvel ext \t1\().16b, \x1\().16b, \x1\().16b, #12 2541abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, \t0\().16b 2551abee99eSArd Biesheuvel ext \t2\().16b, \x2\().16b, \x2\().16b, #12 2561abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t1\().16b 2571abee99eSArd Biesheuvel ext \t3\().16b, \x3\().16b, \x3\().16b, #12 2581abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, \t2\().16b 2591abee99eSArd Biesheuvel ext \t4\().16b, \x4\().16b, \x4\().16b, #12 2601abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t3\().16b 2611abee99eSArd Biesheuvel ext \t5\().16b, \x5\().16b, \x5\().16b, #12 2621abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t4\().16b 2631abee99eSArd Biesheuvel ext \t6\().16b, \x6\().16b, \x6\().16b, #12 2641abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t5\().16b 2651abee99eSArd Biesheuvel ext \t7\().16b, \x7\().16b, \x7\().16b, #12 2661abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, \t6\().16b 2671abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \x0\().16b 2681abee99eSArd Biesheuvel eor \x7\().16b, \x7\().16b, \t7\().16b 2691abee99eSArd Biesheuvel ext \x0\().16b, \x0\().16b, \x0\().16b, #8 2701abee99eSArd Biesheuvel eor \t2\().16b, \t2\().16b, \x1\().16b 2711abee99eSArd Biesheuvel eor \t0\().16b, \t0\().16b, \x7\().16b 2721abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \x7\().16b 2731abee99eSArd Biesheuvel ext \x1\().16b, \x1\().16b, \x1\().16b, #8 2741abee99eSArd Biesheuvel eor \t5\().16b, \t5\().16b, \x4\().16b 2751abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, \t0\().16b 2761abee99eSArd Biesheuvel eor \t6\().16b, \t6\().16b, \x5\().16b 2771abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t1\().16b 2781abee99eSArd Biesheuvel ext \t0\().16b, \x4\().16b, \x4\().16b, #8 2791abee99eSArd Biesheuvel eor \t4\().16b, \t4\().16b, \x3\().16b 2801abee99eSArd Biesheuvel ext \t1\().16b, \x5\().16b, \x5\().16b, #8 2811abee99eSArd Biesheuvel eor \t7\().16b, \t7\().16b, \x6\().16b 2821abee99eSArd Biesheuvel ext \x4\().16b, \x3\().16b, \x3\().16b, #8 2831abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x2\().16b 2841abee99eSArd Biesheuvel ext \x5\().16b, \x7\().16b, \x7\().16b, #8 2851abee99eSArd Biesheuvel eor \t4\().16b, \t4\().16b, \x7\().16b 2861abee99eSArd Biesheuvel ext \x3\().16b, \x6\().16b, \x6\().16b, #8 2871abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x7\().16b 2881abee99eSArd Biesheuvel ext \x6\().16b, \x2\().16b, \x2\().16b, #8 2891abee99eSArd Biesheuvel eor \x7\().16b, \t1\().16b, \t5\().16b 2901abee99eSArd Biesheuvel .ifb \inv 2911abee99eSArd Biesheuvel eor \x2\().16b, \t0\().16b, \t4\().16b 2921abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t3\().16b 2931abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t7\().16b 2941abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t6\().16b 2951abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, \t2\().16b 2961abee99eSArd Biesheuvel .else 2971abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x4\().16b 2981abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t7\().16b 2991abee99eSArd Biesheuvel eor \x2\().16b, \x3\().16b, \t6\().16b 3001abee99eSArd Biesheuvel eor \x3\().16b, \t0\().16b, \t4\().16b 3011abee99eSArd Biesheuvel eor \x4\().16b, \x6\().16b, \t2\().16b 3021abee99eSArd Biesheuvel mov \x6\().16b, \t3\().16b 3031abee99eSArd Biesheuvel .endif 3041abee99eSArd Biesheuvel .endm 3051abee99eSArd Biesheuvel 3061abee99eSArd Biesheuvel .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 3071abee99eSArd Biesheuvel t0, t1, t2, t3, t4, t5, t6, t7 3081abee99eSArd Biesheuvel ext \t0\().16b, \x0\().16b, \x0\().16b, #8 3091abee99eSArd Biesheuvel ext \t6\().16b, \x6\().16b, \x6\().16b, #8 3101abee99eSArd Biesheuvel ext \t7\().16b, \x7\().16b, \x7\().16b, #8 3111abee99eSArd Biesheuvel eor \t0\().16b, \t0\().16b, \x0\().16b 3121abee99eSArd Biesheuvel ext \t1\().16b, \x1\().16b, \x1\().16b, #8 3131abee99eSArd Biesheuvel eor \t6\().16b, \t6\().16b, \x6\().16b 3141abee99eSArd Biesheuvel ext \t2\().16b, \x2\().16b, \x2\().16b, #8 3151abee99eSArd Biesheuvel eor \t7\().16b, \t7\().16b, \x7\().16b 3161abee99eSArd Biesheuvel ext \t3\().16b, \x3\().16b, \x3\().16b, #8 3171abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \x1\().16b 3181abee99eSArd Biesheuvel ext \t4\().16b, \x4\().16b, \x4\().16b, #8 3191abee99eSArd Biesheuvel eor \t2\().16b, \t2\().16b, \x2\().16b 3201abee99eSArd Biesheuvel ext \t5\().16b, \x5\().16b, \x5\().16b, #8 3211abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x3\().16b 3221abee99eSArd Biesheuvel eor \t4\().16b, \t4\().16b, \x4\().16b 3231abee99eSArd Biesheuvel eor \t5\().16b, \t5\().16b, \x5\().16b 3241abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, \t6\().16b 3251abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t6\().16b 3261abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, \t0\().16b 3271abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t2\().16b 3281abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t1\().16b 3291abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t7\().16b 3301abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, \t7\().16b 3311abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t6\().16b 3321abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t3\().16b 3331abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t6\().16b 3341abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, \t4\().16b 3351abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t7\().16b 3361abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t7\().16b 3371abee99eSArd Biesheuvel eor \x7\().16b, \x7\().16b, \t5\().16b 3381abee99eSArd Biesheuvel mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 3391abee99eSArd Biesheuvel \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 3401abee99eSArd Biesheuvel .endm 3411abee99eSArd Biesheuvel 3421abee99eSArd Biesheuvel .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 3431abee99eSArd Biesheuvel ushr \t0\().2d, \b0\().2d, #\n 3441abee99eSArd Biesheuvel ushr \t1\().2d, \b1\().2d, #\n 3451abee99eSArd Biesheuvel eor \t0\().16b, \t0\().16b, \a0\().16b 3461abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \a1\().16b 3471abee99eSArd Biesheuvel and \t0\().16b, \t0\().16b, \mask\().16b 3481abee99eSArd Biesheuvel and \t1\().16b, \t1\().16b, \mask\().16b 3491abee99eSArd Biesheuvel eor \a0\().16b, \a0\().16b, \t0\().16b 3501abee99eSArd Biesheuvel shl \t0\().2d, \t0\().2d, #\n 3511abee99eSArd Biesheuvel eor \a1\().16b, \a1\().16b, \t1\().16b 3521abee99eSArd Biesheuvel shl \t1\().2d, \t1\().2d, #\n 3531abee99eSArd Biesheuvel eor \b0\().16b, \b0\().16b, \t0\().16b 3541abee99eSArd Biesheuvel eor \b1\().16b, \b1\().16b, \t1\().16b 3551abee99eSArd Biesheuvel .endm 3561abee99eSArd Biesheuvel 3571abee99eSArd Biesheuvel .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 3581abee99eSArd Biesheuvel movi \t0\().16b, #0x55 3591abee99eSArd Biesheuvel movi \t1\().16b, #0x33 3601abee99eSArd Biesheuvel swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 3611abee99eSArd Biesheuvel swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 3621abee99eSArd Biesheuvel movi \t0\().16b, #0x0f 3631abee99eSArd Biesheuvel swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 3641abee99eSArd Biesheuvel swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 3651abee99eSArd Biesheuvel swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 3661abee99eSArd Biesheuvel swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 3671abee99eSArd Biesheuvel .endm 3681abee99eSArd Biesheuvel 3691abee99eSArd Biesheuvel 3701abee99eSArd Biesheuvel .align 6 3711abee99eSArd BiesheuvelM0: .octa 0x0004080c0105090d02060a0e03070b0f 3721abee99eSArd Biesheuvel 3731abee99eSArd BiesheuvelM0SR: .octa 0x0004080c05090d010a0e02060f03070b 3741abee99eSArd BiesheuvelSR: .octa 0x0f0e0d0c0a09080b0504070600030201 3751abee99eSArd BiesheuvelSRM0: .octa 0x01060b0c0207080d0304090e00050a0f 3761abee99eSArd Biesheuvel 3771abee99eSArd BiesheuvelM0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03 3781abee99eSArd BiesheuvelISR: .octa 0x0f0e0d0c080b0a090504070602010003 3791abee99eSArd BiesheuvelISRM0: .octa 0x0306090c00070a0d01040b0e0205080f 3801abee99eSArd Biesheuvel 3811abee99eSArd Biesheuvel /* 3821abee99eSArd Biesheuvel * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) 3831abee99eSArd Biesheuvel */ 3840e89640bSMark BrownSYM_FUNC_START(aesbs_convert_key) 3851abee99eSArd Biesheuvel ld1 {v7.4s}, [x1], #16 // load round 0 key 3861abee99eSArd Biesheuvel ld1 {v17.4s}, [x1], #16 // load round 1 key 3871abee99eSArd Biesheuvel 3881abee99eSArd Biesheuvel movi v8.16b, #0x01 // bit masks 3891abee99eSArd Biesheuvel movi v9.16b, #0x02 3901abee99eSArd Biesheuvel movi v10.16b, #0x04 3911abee99eSArd Biesheuvel movi v11.16b, #0x08 3921abee99eSArd Biesheuvel movi v12.16b, #0x10 3931abee99eSArd Biesheuvel movi v13.16b, #0x20 3941abee99eSArd Biesheuvel movi v14.16b, #0x40 3951abee99eSArd Biesheuvel movi v15.16b, #0x80 3961abee99eSArd Biesheuvel ldr q16, M0 3971abee99eSArd Biesheuvel 3981abee99eSArd Biesheuvel sub x2, x2, #1 3991abee99eSArd Biesheuvel str q7, [x0], #16 // save round 0 key 4001abee99eSArd Biesheuvel 4011abee99eSArd Biesheuvel.Lkey_loop: 4021abee99eSArd Biesheuvel tbl v7.16b ,{v17.16b}, v16.16b 4031abee99eSArd Biesheuvel ld1 {v17.4s}, [x1], #16 // load next round key 4041abee99eSArd Biesheuvel 4051abee99eSArd Biesheuvel cmtst v0.16b, v7.16b, v8.16b 4061abee99eSArd Biesheuvel cmtst v1.16b, v7.16b, v9.16b 4071abee99eSArd Biesheuvel cmtst v2.16b, v7.16b, v10.16b 4081abee99eSArd Biesheuvel cmtst v3.16b, v7.16b, v11.16b 4091abee99eSArd Biesheuvel cmtst v4.16b, v7.16b, v12.16b 4101abee99eSArd Biesheuvel cmtst v5.16b, v7.16b, v13.16b 4111abee99eSArd Biesheuvel cmtst v6.16b, v7.16b, v14.16b 4121abee99eSArd Biesheuvel cmtst v7.16b, v7.16b, v15.16b 4131abee99eSArd Biesheuvel not v0.16b, v0.16b 4141abee99eSArd Biesheuvel not v1.16b, v1.16b 4151abee99eSArd Biesheuvel not v5.16b, v5.16b 4161abee99eSArd Biesheuvel not v6.16b, v6.16b 4171abee99eSArd Biesheuvel 4181abee99eSArd Biesheuvel subs x2, x2, #1 4191abee99eSArd Biesheuvel stp q0, q1, [x0], #128 4201abee99eSArd Biesheuvel stp q2, q3, [x0, #-96] 4211abee99eSArd Biesheuvel stp q4, q5, [x0, #-64] 4221abee99eSArd Biesheuvel stp q6, q7, [x0, #-32] 4231abee99eSArd Biesheuvel b.ne .Lkey_loop 4241abee99eSArd Biesheuvel 4251abee99eSArd Biesheuvel movi v7.16b, #0x63 // compose .L63 4261abee99eSArd Biesheuvel eor v17.16b, v17.16b, v7.16b 4271abee99eSArd Biesheuvel str q17, [x0] 4281abee99eSArd Biesheuvel ret 4290e89640bSMark BrownSYM_FUNC_END(aesbs_convert_key) 4301abee99eSArd Biesheuvel 4311abee99eSArd Biesheuvel .align 4 4320e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_encrypt8) 4331abee99eSArd Biesheuvel ldr q9, [bskey], #16 // round 0 key 4341abee99eSArd Biesheuvel ldr q8, M0SR 4351abee99eSArd Biesheuvel ldr q24, SR 4361abee99eSArd Biesheuvel 4371abee99eSArd Biesheuvel eor v10.16b, v0.16b, v9.16b // xor with round0 key 4381abee99eSArd Biesheuvel eor v11.16b, v1.16b, v9.16b 4391abee99eSArd Biesheuvel tbl v0.16b, {v10.16b}, v8.16b 4401abee99eSArd Biesheuvel eor v12.16b, v2.16b, v9.16b 4411abee99eSArd Biesheuvel tbl v1.16b, {v11.16b}, v8.16b 4421abee99eSArd Biesheuvel eor v13.16b, v3.16b, v9.16b 4431abee99eSArd Biesheuvel tbl v2.16b, {v12.16b}, v8.16b 4441abee99eSArd Biesheuvel eor v14.16b, v4.16b, v9.16b 4451abee99eSArd Biesheuvel tbl v3.16b, {v13.16b}, v8.16b 4461abee99eSArd Biesheuvel eor v15.16b, v5.16b, v9.16b 4471abee99eSArd Biesheuvel tbl v4.16b, {v14.16b}, v8.16b 4481abee99eSArd Biesheuvel eor v10.16b, v6.16b, v9.16b 4491abee99eSArd Biesheuvel tbl v5.16b, {v15.16b}, v8.16b 4501abee99eSArd Biesheuvel eor v11.16b, v7.16b, v9.16b 4511abee99eSArd Biesheuvel tbl v6.16b, {v10.16b}, v8.16b 4521abee99eSArd Biesheuvel tbl v7.16b, {v11.16b}, v8.16b 4531abee99eSArd Biesheuvel 4541abee99eSArd Biesheuvel bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 4551abee99eSArd Biesheuvel 4561abee99eSArd Biesheuvel sub rounds, rounds, #1 4571abee99eSArd Biesheuvel b .Lenc_sbox 4581abee99eSArd Biesheuvel 4591abee99eSArd Biesheuvel.Lenc_loop: 4601abee99eSArd Biesheuvel shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 4611abee99eSArd Biesheuvel.Lenc_sbox: 4621abee99eSArd Biesheuvel sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ 4631abee99eSArd Biesheuvel v13, v14, v15 4641abee99eSArd Biesheuvel subs rounds, rounds, #1 4651abee99eSArd Biesheuvel b.cc .Lenc_done 4661abee99eSArd Biesheuvel 4671abee99eSArd Biesheuvel enc_next_rk 4681abee99eSArd Biesheuvel 4691abee99eSArd Biesheuvel mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \ 4701abee99eSArd Biesheuvel v13, v14, v15 4711abee99eSArd Biesheuvel 4721abee99eSArd Biesheuvel add_round_key v0, v1, v2, v3, v4, v5, v6, v7 4731abee99eSArd Biesheuvel 4741abee99eSArd Biesheuvel b.ne .Lenc_loop 4751abee99eSArd Biesheuvel ldr q24, SRM0 4761abee99eSArd Biesheuvel b .Lenc_loop 4771abee99eSArd Biesheuvel 4781abee99eSArd Biesheuvel.Lenc_done: 4791abee99eSArd Biesheuvel ldr q12, [bskey] // last round key 4801abee99eSArd Biesheuvel 4811abee99eSArd Biesheuvel bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11 4821abee99eSArd Biesheuvel 4831abee99eSArd Biesheuvel eor v0.16b, v0.16b, v12.16b 4841abee99eSArd Biesheuvel eor v1.16b, v1.16b, v12.16b 4851abee99eSArd Biesheuvel eor v4.16b, v4.16b, v12.16b 4861abee99eSArd Biesheuvel eor v6.16b, v6.16b, v12.16b 4871abee99eSArd Biesheuvel eor v3.16b, v3.16b, v12.16b 4881abee99eSArd Biesheuvel eor v7.16b, v7.16b, v12.16b 4891abee99eSArd Biesheuvel eor v2.16b, v2.16b, v12.16b 4901abee99eSArd Biesheuvel eor v5.16b, v5.16b, v12.16b 4911abee99eSArd Biesheuvel ret 4920e89640bSMark BrownSYM_FUNC_END(aesbs_encrypt8) 4931abee99eSArd Biesheuvel 4941abee99eSArd Biesheuvel .align 4 4950e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_decrypt8) 4961abee99eSArd Biesheuvel lsl x9, rounds, #7 4971abee99eSArd Biesheuvel add bskey, bskey, x9 4981abee99eSArd Biesheuvel 4991abee99eSArd Biesheuvel ldr q9, [bskey, #-112]! // round 0 key 5001abee99eSArd Biesheuvel ldr q8, M0ISR 5011abee99eSArd Biesheuvel ldr q24, ISR 5021abee99eSArd Biesheuvel 5031abee99eSArd Biesheuvel eor v10.16b, v0.16b, v9.16b // xor with round0 key 5041abee99eSArd Biesheuvel eor v11.16b, v1.16b, v9.16b 5051abee99eSArd Biesheuvel tbl v0.16b, {v10.16b}, v8.16b 5061abee99eSArd Biesheuvel eor v12.16b, v2.16b, v9.16b 5071abee99eSArd Biesheuvel tbl v1.16b, {v11.16b}, v8.16b 5081abee99eSArd Biesheuvel eor v13.16b, v3.16b, v9.16b 5091abee99eSArd Biesheuvel tbl v2.16b, {v12.16b}, v8.16b 5101abee99eSArd Biesheuvel eor v14.16b, v4.16b, v9.16b 5111abee99eSArd Biesheuvel tbl v3.16b, {v13.16b}, v8.16b 5121abee99eSArd Biesheuvel eor v15.16b, v5.16b, v9.16b 5131abee99eSArd Biesheuvel tbl v4.16b, {v14.16b}, v8.16b 5141abee99eSArd Biesheuvel eor v10.16b, v6.16b, v9.16b 5151abee99eSArd Biesheuvel tbl v5.16b, {v15.16b}, v8.16b 5161abee99eSArd Biesheuvel eor v11.16b, v7.16b, v9.16b 5171abee99eSArd Biesheuvel tbl v6.16b, {v10.16b}, v8.16b 5181abee99eSArd Biesheuvel tbl v7.16b, {v11.16b}, v8.16b 5191abee99eSArd Biesheuvel 5201abee99eSArd Biesheuvel bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 5211abee99eSArd Biesheuvel 5221abee99eSArd Biesheuvel sub rounds, rounds, #1 5231abee99eSArd Biesheuvel b .Ldec_sbox 5241abee99eSArd Biesheuvel 5251abee99eSArd Biesheuvel.Ldec_loop: 5261abee99eSArd Biesheuvel shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 5271abee99eSArd Biesheuvel.Ldec_sbox: 5281abee99eSArd Biesheuvel inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ 5291abee99eSArd Biesheuvel v13, v14, v15 5301abee99eSArd Biesheuvel subs rounds, rounds, #1 5311abee99eSArd Biesheuvel b.cc .Ldec_done 5321abee99eSArd Biesheuvel 5331abee99eSArd Biesheuvel dec_next_rk 5341abee99eSArd Biesheuvel 5351abee99eSArd Biesheuvel add_round_key v0, v1, v6, v4, v2, v7, v3, v5 5361abee99eSArd Biesheuvel 5371abee99eSArd Biesheuvel inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \ 5381abee99eSArd Biesheuvel v13, v14, v15 5391abee99eSArd Biesheuvel 5401abee99eSArd Biesheuvel b.ne .Ldec_loop 5411abee99eSArd Biesheuvel ldr q24, ISRM0 5421abee99eSArd Biesheuvel b .Ldec_loop 5431abee99eSArd Biesheuvel.Ldec_done: 5441abee99eSArd Biesheuvel ldr q12, [bskey, #-16] // last round key 5451abee99eSArd Biesheuvel 5461abee99eSArd Biesheuvel bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11 5471abee99eSArd Biesheuvel 5481abee99eSArd Biesheuvel eor v0.16b, v0.16b, v12.16b 5491abee99eSArd Biesheuvel eor v1.16b, v1.16b, v12.16b 5501abee99eSArd Biesheuvel eor v6.16b, v6.16b, v12.16b 5511abee99eSArd Biesheuvel eor v4.16b, v4.16b, v12.16b 5521abee99eSArd Biesheuvel eor v2.16b, v2.16b, v12.16b 5531abee99eSArd Biesheuvel eor v7.16b, v7.16b, v12.16b 5541abee99eSArd Biesheuvel eor v3.16b, v3.16b, v12.16b 5551abee99eSArd Biesheuvel eor v5.16b, v5.16b, v12.16b 5561abee99eSArd Biesheuvel ret 5570e89640bSMark BrownSYM_FUNC_END(aesbs_decrypt8) 5581abee99eSArd Biesheuvel 5591abee99eSArd Biesheuvel /* 5601abee99eSArd Biesheuvel * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 5611abee99eSArd Biesheuvel * int blocks) 5621abee99eSArd Biesheuvel * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 5631abee99eSArd Biesheuvel * int blocks) 5641abee99eSArd Biesheuvel */ 5651abee99eSArd Biesheuvel .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 56620ab6332SArd Biesheuvel frame_push 5 56720ab6332SArd Biesheuvel 56820ab6332SArd Biesheuvel mov x19, x0 56920ab6332SArd Biesheuvel mov x20, x1 57020ab6332SArd Biesheuvel mov x21, x2 57120ab6332SArd Biesheuvel mov x22, x3 57220ab6332SArd Biesheuvel mov x23, x4 5731abee99eSArd Biesheuvel 5741abee99eSArd Biesheuvel99: mov x5, #1 57520ab6332SArd Biesheuvel lsl x5, x5, x23 57620ab6332SArd Biesheuvel subs w23, w23, #8 57720ab6332SArd Biesheuvel csel x23, x23, xzr, pl 5781abee99eSArd Biesheuvel csel x5, x5, xzr, mi 5791abee99eSArd Biesheuvel 58020ab6332SArd Biesheuvel ld1 {v0.16b}, [x20], #16 5811abee99eSArd Biesheuvel tbnz x5, #1, 0f 58220ab6332SArd Biesheuvel ld1 {v1.16b}, [x20], #16 5831abee99eSArd Biesheuvel tbnz x5, #2, 0f 58420ab6332SArd Biesheuvel ld1 {v2.16b}, [x20], #16 5851abee99eSArd Biesheuvel tbnz x5, #3, 0f 58620ab6332SArd Biesheuvel ld1 {v3.16b}, [x20], #16 5871abee99eSArd Biesheuvel tbnz x5, #4, 0f 58820ab6332SArd Biesheuvel ld1 {v4.16b}, [x20], #16 5891abee99eSArd Biesheuvel tbnz x5, #5, 0f 59020ab6332SArd Biesheuvel ld1 {v5.16b}, [x20], #16 5911abee99eSArd Biesheuvel tbnz x5, #6, 0f 59220ab6332SArd Biesheuvel ld1 {v6.16b}, [x20], #16 5931abee99eSArd Biesheuvel tbnz x5, #7, 0f 59420ab6332SArd Biesheuvel ld1 {v7.16b}, [x20], #16 5951abee99eSArd Biesheuvel 59620ab6332SArd Biesheuvel0: mov bskey, x21 59720ab6332SArd Biesheuvel mov rounds, x22 5981abee99eSArd Biesheuvel bl \do8 5991abee99eSArd Biesheuvel 60020ab6332SArd Biesheuvel st1 {\o0\().16b}, [x19], #16 6011abee99eSArd Biesheuvel tbnz x5, #1, 1f 60220ab6332SArd Biesheuvel st1 {\o1\().16b}, [x19], #16 6031abee99eSArd Biesheuvel tbnz x5, #2, 1f 60420ab6332SArd Biesheuvel st1 {\o2\().16b}, [x19], #16 6051abee99eSArd Biesheuvel tbnz x5, #3, 1f 60620ab6332SArd Biesheuvel st1 {\o3\().16b}, [x19], #16 6071abee99eSArd Biesheuvel tbnz x5, #4, 1f 60820ab6332SArd Biesheuvel st1 {\o4\().16b}, [x19], #16 6091abee99eSArd Biesheuvel tbnz x5, #5, 1f 61020ab6332SArd Biesheuvel st1 {\o5\().16b}, [x19], #16 6111abee99eSArd Biesheuvel tbnz x5, #6, 1f 61220ab6332SArd Biesheuvel st1 {\o6\().16b}, [x19], #16 6131abee99eSArd Biesheuvel tbnz x5, #7, 1f 61420ab6332SArd Biesheuvel st1 {\o7\().16b}, [x19], #16 6151abee99eSArd Biesheuvel 61620ab6332SArd Biesheuvel cbz x23, 1f 61720ab6332SArd Biesheuvel b 99b 6181abee99eSArd Biesheuvel 61920ab6332SArd Biesheuvel1: frame_pop 6201abee99eSArd Biesheuvel ret 6211abee99eSArd Biesheuvel .endm 6221abee99eSArd Biesheuvel 6231abee99eSArd Biesheuvel .align 4 624*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_ecb_encrypt) 6251abee99eSArd Biesheuvel __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 6260e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_encrypt) 6271abee99eSArd Biesheuvel 6281abee99eSArd Biesheuvel .align 4 629*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_ecb_decrypt) 6301abee99eSArd Biesheuvel __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 6310e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_decrypt) 6321abee99eSArd Biesheuvel 6331abee99eSArd Biesheuvel /* 6341abee99eSArd Biesheuvel * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 6351abee99eSArd Biesheuvel * int blocks, u8 iv[]) 6361abee99eSArd Biesheuvel */ 6371abee99eSArd Biesheuvel .align 4 6380e89640bSMark BrownSYM_FUNC_START(aesbs_cbc_decrypt) 63920ab6332SArd Biesheuvel frame_push 6 64020ab6332SArd Biesheuvel 64120ab6332SArd Biesheuvel mov x19, x0 64220ab6332SArd Biesheuvel mov x20, x1 64320ab6332SArd Biesheuvel mov x21, x2 64420ab6332SArd Biesheuvel mov x22, x3 64520ab6332SArd Biesheuvel mov x23, x4 64620ab6332SArd Biesheuvel mov x24, x5 6471abee99eSArd Biesheuvel 6481abee99eSArd Biesheuvel99: mov x6, #1 64920ab6332SArd Biesheuvel lsl x6, x6, x23 65020ab6332SArd Biesheuvel subs w23, w23, #8 65120ab6332SArd Biesheuvel csel x23, x23, xzr, pl 6521abee99eSArd Biesheuvel csel x6, x6, xzr, mi 6531abee99eSArd Biesheuvel 65420ab6332SArd Biesheuvel ld1 {v0.16b}, [x20], #16 6551abee99eSArd Biesheuvel mov v25.16b, v0.16b 6561abee99eSArd Biesheuvel tbnz x6, #1, 0f 65720ab6332SArd Biesheuvel ld1 {v1.16b}, [x20], #16 6581abee99eSArd Biesheuvel mov v26.16b, v1.16b 6591abee99eSArd Biesheuvel tbnz x6, #2, 0f 66020ab6332SArd Biesheuvel ld1 {v2.16b}, [x20], #16 6611abee99eSArd Biesheuvel mov v27.16b, v2.16b 6621abee99eSArd Biesheuvel tbnz x6, #3, 0f 66320ab6332SArd Biesheuvel ld1 {v3.16b}, [x20], #16 6641abee99eSArd Biesheuvel mov v28.16b, v3.16b 6651abee99eSArd Biesheuvel tbnz x6, #4, 0f 66620ab6332SArd Biesheuvel ld1 {v4.16b}, [x20], #16 6671abee99eSArd Biesheuvel mov v29.16b, v4.16b 6681abee99eSArd Biesheuvel tbnz x6, #5, 0f 66920ab6332SArd Biesheuvel ld1 {v5.16b}, [x20], #16 6701abee99eSArd Biesheuvel mov v30.16b, v5.16b 6711abee99eSArd Biesheuvel tbnz x6, #6, 0f 67220ab6332SArd Biesheuvel ld1 {v6.16b}, [x20], #16 6731abee99eSArd Biesheuvel mov v31.16b, v6.16b 6741abee99eSArd Biesheuvel tbnz x6, #7, 0f 67520ab6332SArd Biesheuvel ld1 {v7.16b}, [x20] 6761abee99eSArd Biesheuvel 67720ab6332SArd Biesheuvel0: mov bskey, x21 67820ab6332SArd Biesheuvel mov rounds, x22 6791abee99eSArd Biesheuvel bl aesbs_decrypt8 6801abee99eSArd Biesheuvel 68120ab6332SArd Biesheuvel ld1 {v24.16b}, [x24] // load IV 6821abee99eSArd Biesheuvel 6831abee99eSArd Biesheuvel eor v1.16b, v1.16b, v25.16b 6841abee99eSArd Biesheuvel eor v6.16b, v6.16b, v26.16b 6851abee99eSArd Biesheuvel eor v4.16b, v4.16b, v27.16b 6861abee99eSArd Biesheuvel eor v2.16b, v2.16b, v28.16b 6871abee99eSArd Biesheuvel eor v7.16b, v7.16b, v29.16b 6881abee99eSArd Biesheuvel eor v0.16b, v0.16b, v24.16b 6891abee99eSArd Biesheuvel eor v3.16b, v3.16b, v30.16b 6901abee99eSArd Biesheuvel eor v5.16b, v5.16b, v31.16b 6911abee99eSArd Biesheuvel 69220ab6332SArd Biesheuvel st1 {v0.16b}, [x19], #16 6931abee99eSArd Biesheuvel mov v24.16b, v25.16b 6941abee99eSArd Biesheuvel tbnz x6, #1, 1f 69520ab6332SArd Biesheuvel st1 {v1.16b}, [x19], #16 6961abee99eSArd Biesheuvel mov v24.16b, v26.16b 6971abee99eSArd Biesheuvel tbnz x6, #2, 1f 69820ab6332SArd Biesheuvel st1 {v6.16b}, [x19], #16 6991abee99eSArd Biesheuvel mov v24.16b, v27.16b 7001abee99eSArd Biesheuvel tbnz x6, #3, 1f 70120ab6332SArd Biesheuvel st1 {v4.16b}, [x19], #16 7021abee99eSArd Biesheuvel mov v24.16b, v28.16b 7031abee99eSArd Biesheuvel tbnz x6, #4, 1f 70420ab6332SArd Biesheuvel st1 {v2.16b}, [x19], #16 7051abee99eSArd Biesheuvel mov v24.16b, v29.16b 7061abee99eSArd Biesheuvel tbnz x6, #5, 1f 70720ab6332SArd Biesheuvel st1 {v7.16b}, [x19], #16 7081abee99eSArd Biesheuvel mov v24.16b, v30.16b 7091abee99eSArd Biesheuvel tbnz x6, #6, 1f 71020ab6332SArd Biesheuvel st1 {v3.16b}, [x19], #16 7111abee99eSArd Biesheuvel mov v24.16b, v31.16b 7121abee99eSArd Biesheuvel tbnz x6, #7, 1f 71320ab6332SArd Biesheuvel ld1 {v24.16b}, [x20], #16 71420ab6332SArd Biesheuvel st1 {v5.16b}, [x19], #16 71520ab6332SArd Biesheuvel1: st1 {v24.16b}, [x24] // store IV 7161abee99eSArd Biesheuvel 71720ab6332SArd Biesheuvel cbz x23, 2f 71820ab6332SArd Biesheuvel b 99b 7191abee99eSArd Biesheuvel 72020ab6332SArd Biesheuvel2: frame_pop 7211abee99eSArd Biesheuvel ret 7220e89640bSMark BrownSYM_FUNC_END(aesbs_cbc_decrypt) 7231abee99eSArd Biesheuvel 7241abee99eSArd Biesheuvel .macro next_tweak, out, in, const, tmp 7251abee99eSArd Biesheuvel sshr \tmp\().2d, \in\().2d, #63 7261abee99eSArd Biesheuvel and \tmp\().16b, \tmp\().16b, \const\().16b 7271abee99eSArd Biesheuvel add \out\().2d, \in\().2d, \in\().2d 7281abee99eSArd Biesheuvel ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 7291abee99eSArd Biesheuvel eor \out\().16b, \out\().16b, \tmp\().16b 7301abee99eSArd Biesheuvel .endm 7311abee99eSArd Biesheuvel 7321abee99eSArd Biesheuvel /* 7331abee99eSArd Biesheuvel * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 7341abee99eSArd Biesheuvel * int blocks, u8 iv[]) 7351abee99eSArd Biesheuvel * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 7361abee99eSArd Biesheuvel * int blocks, u8 iv[]) 7371abee99eSArd Biesheuvel */ 7380e89640bSMark BrownSYM_FUNC_START_LOCAL(__xts_crypt8) 739dfc6031eSArd Biesheuvel movi v18.2s, #0x1 740dfc6031eSArd Biesheuvel movi v19.2s, #0x87 741dfc6031eSArd Biesheuvel uzp1 v18.4s, v18.4s, v19.4s 7421abee99eSArd Biesheuvel 743dfc6031eSArd Biesheuvel ld1 {v0.16b-v3.16b}, [x1], #64 744dfc6031eSArd Biesheuvel ld1 {v4.16b-v7.16b}, [x1], #64 745dfc6031eSArd Biesheuvel 746dfc6031eSArd Biesheuvel next_tweak v26, v25, v18, v19 747dfc6031eSArd Biesheuvel next_tweak v27, v26, v18, v19 748dfc6031eSArd Biesheuvel next_tweak v28, v27, v18, v19 749dfc6031eSArd Biesheuvel next_tweak v29, v28, v18, v19 750dfc6031eSArd Biesheuvel next_tweak v30, v29, v18, v19 751dfc6031eSArd Biesheuvel next_tweak v31, v30, v18, v19 752dfc6031eSArd Biesheuvel next_tweak v16, v31, v18, v19 753dfc6031eSArd Biesheuvel next_tweak v17, v16, v18, v19 754dfc6031eSArd Biesheuvel 7551abee99eSArd Biesheuvel eor v0.16b, v0.16b, v25.16b 7561abee99eSArd Biesheuvel eor v1.16b, v1.16b, v26.16b 7571abee99eSArd Biesheuvel eor v2.16b, v2.16b, v27.16b 7581abee99eSArd Biesheuvel eor v3.16b, v3.16b, v28.16b 7591abee99eSArd Biesheuvel eor v4.16b, v4.16b, v29.16b 760dfc6031eSArd Biesheuvel eor v5.16b, v5.16b, v30.16b 761dfc6031eSArd Biesheuvel eor v6.16b, v6.16b, v31.16b 762dfc6031eSArd Biesheuvel eor v7.16b, v7.16b, v16.16b 7631abee99eSArd Biesheuvel 76467ab02dcSArd Biesheuvel stp q16, q17, [x6] 7651abee99eSArd Biesheuvel 766dfc6031eSArd Biesheuvel mov bskey, x2 767dfc6031eSArd Biesheuvel mov rounds, x3 76839e4716cSJeremy Linton br x16 7690e89640bSMark BrownSYM_FUNC_END(__xts_crypt8) 7701abee99eSArd Biesheuvel 7711abee99eSArd Biesheuvel .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 77267ab02dcSArd Biesheuvel frame_push 0, 32 77367ab02dcSArd Biesheuvel add x6, sp, #.Lframe_local_offset 7741abee99eSArd Biesheuvel 775dfc6031eSArd Biesheuvel ld1 {v25.16b}, [x5] 77620ab6332SArd Biesheuvel 777dfc6031eSArd Biesheuvel0: adr x16, \do8 7781abee99eSArd Biesheuvel bl __xts_crypt8 7791abee99eSArd Biesheuvel 780dfc6031eSArd Biesheuvel eor v16.16b, \o0\().16b, v25.16b 781dfc6031eSArd Biesheuvel eor v17.16b, \o1\().16b, v26.16b 782dfc6031eSArd Biesheuvel eor v18.16b, \o2\().16b, v27.16b 783dfc6031eSArd Biesheuvel eor v19.16b, \o3\().16b, v28.16b 7841abee99eSArd Biesheuvel 78567ab02dcSArd Biesheuvel ldp q24, q25, [x6] 7861abee99eSArd Biesheuvel 787dfc6031eSArd Biesheuvel eor v20.16b, \o4\().16b, v29.16b 788dfc6031eSArd Biesheuvel eor v21.16b, \o5\().16b, v30.16b 789dfc6031eSArd Biesheuvel eor v22.16b, \o6\().16b, v31.16b 790dfc6031eSArd Biesheuvel eor v23.16b, \o7\().16b, v24.16b 7911abee99eSArd Biesheuvel 792dfc6031eSArd Biesheuvel st1 {v16.16b-v19.16b}, [x0], #64 793dfc6031eSArd Biesheuvel st1 {v20.16b-v23.16b}, [x0], #64 7941abee99eSArd Biesheuvel 795dfc6031eSArd Biesheuvel subs x4, x4, #8 796dfc6031eSArd Biesheuvel b.gt 0b 7971abee99eSArd Biesheuvel 798dfc6031eSArd Biesheuvel st1 {v25.16b}, [x5] 79967ab02dcSArd Biesheuvel frame_pop 8001abee99eSArd Biesheuvel ret 8011abee99eSArd Biesheuvel .endm 8021abee99eSArd Biesheuvel 803*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_xts_encrypt) 8041abee99eSArd Biesheuvel __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 8050e89640bSMark BrownSYM_FUNC_END(aesbs_xts_encrypt) 8061abee99eSArd Biesheuvel 807*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_xts_decrypt) 8081abee99eSArd Biesheuvel __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 8090e89640bSMark BrownSYM_FUNC_END(aesbs_xts_decrypt) 8101abee99eSArd Biesheuvel 8111abee99eSArd Biesheuvel .macro next_ctr, v 8121abee99eSArd Biesheuvel mov \v\().d[1], x8 8131abee99eSArd Biesheuvel adds x8, x8, #1 8141abee99eSArd Biesheuvel mov \v\().d[0], x7 8151abee99eSArd Biesheuvel adc x7, x7, xzr 8161abee99eSArd Biesheuvel rev64 \v\().16b, \v\().16b 8171abee99eSArd Biesheuvel .endm 8181abee99eSArd Biesheuvel 8191abee99eSArd Biesheuvel /* 8201abee99eSArd Biesheuvel * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], 821fc074e13SArd Biesheuvel * int rounds, int blocks, u8 iv[]) 8221abee99eSArd Biesheuvel */ 8230e89640bSMark BrownSYM_FUNC_START(aesbs_ctr_encrypt) 82467ab02dcSArd Biesheuvel frame_push 0 825fc074e13SArd Biesheuvel ldp x7, x8, [x5] 826fc074e13SArd Biesheuvel ld1 {v0.16b}, [x5] 8271abee99eSArd BiesheuvelCPU_LE( rev x7, x7 ) 8281abee99eSArd BiesheuvelCPU_LE( rev x8, x8 ) 8291abee99eSArd Biesheuvel adds x8, x8, #1 8301abee99eSArd Biesheuvel adc x7, x7, xzr 8311abee99eSArd Biesheuvel 832fc074e13SArd Biesheuvel0: next_ctr v1 8331abee99eSArd Biesheuvel next_ctr v2 8341abee99eSArd Biesheuvel next_ctr v3 8351abee99eSArd Biesheuvel next_ctr v4 8361abee99eSArd Biesheuvel next_ctr v5 8371abee99eSArd Biesheuvel next_ctr v6 8381abee99eSArd Biesheuvel next_ctr v7 8391abee99eSArd Biesheuvel 840fc074e13SArd Biesheuvel mov bskey, x2 841fc074e13SArd Biesheuvel mov rounds, x3 8421abee99eSArd Biesheuvel bl aesbs_encrypt8 8431abee99eSArd Biesheuvel 844fc074e13SArd Biesheuvel ld1 { v8.16b-v11.16b}, [x1], #64 845fc074e13SArd Biesheuvel ld1 {v12.16b-v15.16b}, [x1], #64 8461abee99eSArd Biesheuvel 847fc074e13SArd Biesheuvel eor v8.16b, v0.16b, v8.16b 848fc074e13SArd Biesheuvel eor v9.16b, v1.16b, v9.16b 849fc074e13SArd Biesheuvel eor v10.16b, v4.16b, v10.16b 850fc074e13SArd Biesheuvel eor v11.16b, v6.16b, v11.16b 851fc074e13SArd Biesheuvel eor v12.16b, v3.16b, v12.16b 852fc074e13SArd Biesheuvel eor v13.16b, v7.16b, v13.16b 853fc074e13SArd Biesheuvel eor v14.16b, v2.16b, v14.16b 854fc074e13SArd Biesheuvel eor v15.16b, v5.16b, v15.16b 8551abee99eSArd Biesheuvel 856fc074e13SArd Biesheuvel st1 { v8.16b-v11.16b}, [x0], #64 857fc074e13SArd Biesheuvel st1 {v12.16b-v15.16b}, [x0], #64 8581abee99eSArd Biesheuvel 859fc074e13SArd Biesheuvel next_ctr v0 860fc074e13SArd Biesheuvel subs x4, x4, #8 861fc074e13SArd Biesheuvel b.gt 0b 8621abee99eSArd Biesheuvel 863fc074e13SArd Biesheuvel st1 {v0.16b}, [x5] 86467ab02dcSArd Biesheuvel frame_pop 8651abee99eSArd Biesheuvel ret 8660e89640bSMark BrownSYM_FUNC_END(aesbs_ctr_encrypt) 867