1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */ 21abee99eSArd Biesheuvel/* 31abee99eSArd Biesheuvel * Bit sliced AES using NEON instructions 41abee99eSArd Biesheuvel * 51abee99eSArd Biesheuvel * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> 61abee99eSArd Biesheuvel */ 71abee99eSArd Biesheuvel 81abee99eSArd Biesheuvel/* 91abee99eSArd Biesheuvel * The algorithm implemented here is described in detail by the paper 101abee99eSArd Biesheuvel * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and 111abee99eSArd Biesheuvel * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf) 121abee99eSArd Biesheuvel * 131abee99eSArd Biesheuvel * This implementation is based primarily on the OpenSSL implementation 141abee99eSArd Biesheuvel * for 32-bit ARM written by Andy Polyakov <appro@openssl.org> 151abee99eSArd Biesheuvel */ 161abee99eSArd Biesheuvel 171abee99eSArd Biesheuvel#include <linux/linkage.h> 181abee99eSArd Biesheuvel#include <asm/assembler.h> 191abee99eSArd Biesheuvel 201abee99eSArd Biesheuvel .text 211abee99eSArd Biesheuvel 221abee99eSArd Biesheuvel rounds .req x11 231abee99eSArd Biesheuvel bskey .req x12 241abee99eSArd Biesheuvel 251abee99eSArd Biesheuvel .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 261abee99eSArd Biesheuvel eor \b2, \b2, \b1 271abee99eSArd Biesheuvel eor \b5, \b5, \b6 281abee99eSArd Biesheuvel eor \b3, \b3, \b0 291abee99eSArd Biesheuvel eor \b6, \b6, \b2 301abee99eSArd Biesheuvel eor \b5, \b5, \b0 311abee99eSArd Biesheuvel eor \b6, \b6, \b3 321abee99eSArd Biesheuvel eor \b3, \b3, \b7 331abee99eSArd Biesheuvel eor \b7, \b7, \b5 341abee99eSArd Biesheuvel eor \b3, \b3, \b4 351abee99eSArd Biesheuvel eor \b4, \b4, \b5 361abee99eSArd Biesheuvel eor \b2, \b2, \b7 371abee99eSArd Biesheuvel eor \b3, \b3, \b1 381abee99eSArd Biesheuvel eor \b1, \b1, \b5 391abee99eSArd Biesheuvel .endm 401abee99eSArd Biesheuvel 411abee99eSArd Biesheuvel .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 421abee99eSArd Biesheuvel eor \b0, \b0, \b6 431abee99eSArd Biesheuvel eor \b1, \b1, \b4 441abee99eSArd Biesheuvel eor \b4, \b4, \b6 451abee99eSArd Biesheuvel eor \b2, \b2, \b0 461abee99eSArd Biesheuvel eor \b6, \b6, \b1 471abee99eSArd Biesheuvel eor \b1, \b1, \b5 481abee99eSArd Biesheuvel eor \b5, \b5, \b3 491abee99eSArd Biesheuvel eor \b3, \b3, \b7 501abee99eSArd Biesheuvel eor \b7, \b7, \b5 511abee99eSArd Biesheuvel eor \b2, \b2, \b5 521abee99eSArd Biesheuvel eor \b4, \b4, \b7 531abee99eSArd Biesheuvel .endm 541abee99eSArd Biesheuvel 551abee99eSArd Biesheuvel .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5 561abee99eSArd Biesheuvel eor \b1, \b1, \b7 571abee99eSArd Biesheuvel eor \b4, \b4, \b7 581abee99eSArd Biesheuvel eor \b7, \b7, \b5 591abee99eSArd Biesheuvel eor \b1, \b1, \b3 601abee99eSArd Biesheuvel eor \b2, \b2, \b5 611abee99eSArd Biesheuvel eor \b3, \b3, \b7 621abee99eSArd Biesheuvel eor \b6, \b6, \b1 631abee99eSArd Biesheuvel eor \b2, \b2, \b0 641abee99eSArd Biesheuvel eor \b5, \b5, \b3 651abee99eSArd Biesheuvel eor \b4, \b4, \b6 661abee99eSArd Biesheuvel eor \b0, \b0, \b6 671abee99eSArd Biesheuvel eor \b1, \b1, \b4 681abee99eSArd Biesheuvel .endm 691abee99eSArd Biesheuvel 701abee99eSArd Biesheuvel .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2 711abee99eSArd Biesheuvel eor \b1, \b1, \b5 721abee99eSArd Biesheuvel eor \b2, \b2, \b7 731abee99eSArd Biesheuvel eor \b3, \b3, \b1 741abee99eSArd Biesheuvel eor \b4, \b4, \b5 751abee99eSArd Biesheuvel eor \b7, \b7, \b5 761abee99eSArd Biesheuvel eor \b3, \b3, \b4 771abee99eSArd Biesheuvel eor \b5, \b5, \b0 781abee99eSArd Biesheuvel eor \b3, \b3, \b7 791abee99eSArd Biesheuvel eor \b6, \b6, \b2 801abee99eSArd Biesheuvel eor \b2, \b2, \b1 811abee99eSArd Biesheuvel eor \b6, \b6, \b3 821abee99eSArd Biesheuvel eor \b3, \b3, \b0 831abee99eSArd Biesheuvel eor \b5, \b5, \b6 841abee99eSArd Biesheuvel .endm 851abee99eSArd Biesheuvel 861abee99eSArd Biesheuvel .macro mul_gf4, x0, x1, y0, y1, t0, t1 871abee99eSArd Biesheuvel eor \t0, \y0, \y1 881abee99eSArd Biesheuvel and \t0, \t0, \x0 891abee99eSArd Biesheuvel eor \x0, \x0, \x1 901abee99eSArd Biesheuvel and \t1, \x1, \y0 911abee99eSArd Biesheuvel and \x0, \x0, \y1 921abee99eSArd Biesheuvel eor \x1, \t1, \t0 931abee99eSArd Biesheuvel eor \x0, \x0, \t1 941abee99eSArd Biesheuvel .endm 951abee99eSArd Biesheuvel 961abee99eSArd Biesheuvel .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1 971abee99eSArd Biesheuvel eor \t0, \y0, \y1 981abee99eSArd Biesheuvel eor \t1, \y2, \y3 991abee99eSArd Biesheuvel and \t0, \t0, \x0 1001abee99eSArd Biesheuvel and \t1, \t1, \x2 1011abee99eSArd Biesheuvel eor \x0, \x0, \x1 1021abee99eSArd Biesheuvel eor \x2, \x2, \x3 1031abee99eSArd Biesheuvel and \x1, \x1, \y0 1041abee99eSArd Biesheuvel and \x3, \x3, \y2 1051abee99eSArd Biesheuvel and \x0, \x0, \y1 1061abee99eSArd Biesheuvel and \x2, \x2, \y3 1071abee99eSArd Biesheuvel eor \x1, \x1, \x0 1081abee99eSArd Biesheuvel eor \x2, \x2, \x3 1091abee99eSArd Biesheuvel eor \x0, \x0, \t0 1101abee99eSArd Biesheuvel eor \x3, \x3, \t1 1111abee99eSArd Biesheuvel .endm 1121abee99eSArd Biesheuvel 1131abee99eSArd Biesheuvel .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \ 1141abee99eSArd Biesheuvel y0, y1, y2, y3, t0, t1, t2, t3 1151abee99eSArd Biesheuvel eor \t0, \x0, \x2 1161abee99eSArd Biesheuvel eor \t1, \x1, \x3 1171abee99eSArd Biesheuvel mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3 1181abee99eSArd Biesheuvel eor \y0, \y0, \y2 1191abee99eSArd Biesheuvel eor \y1, \y1, \y3 1201abee99eSArd Biesheuvel mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2 1211abee99eSArd Biesheuvel eor \x0, \x0, \t0 1221abee99eSArd Biesheuvel eor \x2, \x2, \t0 1231abee99eSArd Biesheuvel eor \x1, \x1, \t1 1241abee99eSArd Biesheuvel eor \x3, \x3, \t1 1251abee99eSArd Biesheuvel eor \t0, \x4, \x6 1261abee99eSArd Biesheuvel eor \t1, \x5, \x7 1271abee99eSArd Biesheuvel mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2 1281abee99eSArd Biesheuvel eor \y0, \y0, \y2 1291abee99eSArd Biesheuvel eor \y1, \y1, \y3 1301abee99eSArd Biesheuvel mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3 1311abee99eSArd Biesheuvel eor \x4, \x4, \t0 1321abee99eSArd Biesheuvel eor \x6, \x6, \t0 1331abee99eSArd Biesheuvel eor \x5, \x5, \t1 1341abee99eSArd Biesheuvel eor \x7, \x7, \t1 1351abee99eSArd Biesheuvel .endm 1361abee99eSArd Biesheuvel 1371abee99eSArd Biesheuvel .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \ 1381abee99eSArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 1391abee99eSArd Biesheuvel eor \t3, \x4, \x6 1401abee99eSArd Biesheuvel eor \t0, \x5, \x7 1411abee99eSArd Biesheuvel eor \t1, \x1, \x3 1421abee99eSArd Biesheuvel eor \s1, \x7, \x6 1431abee99eSArd Biesheuvel eor \s0, \x0, \x2 1441abee99eSArd Biesheuvel eor \s3, \t3, \t0 1451abee99eSArd Biesheuvel orr \t2, \t0, \t1 1461abee99eSArd Biesheuvel and \s2, \t3, \s0 1471abee99eSArd Biesheuvel orr \t3, \t3, \s0 1481abee99eSArd Biesheuvel eor \s0, \s0, \t1 1491abee99eSArd Biesheuvel and \t0, \t0, \t1 1501abee99eSArd Biesheuvel eor \t1, \x3, \x2 1511abee99eSArd Biesheuvel and \s3, \s3, \s0 1521abee99eSArd Biesheuvel and \s1, \s1, \t1 1531abee99eSArd Biesheuvel eor \t1, \x4, \x5 1541abee99eSArd Biesheuvel eor \s0, \x1, \x0 1551abee99eSArd Biesheuvel eor \t3, \t3, \s1 1561abee99eSArd Biesheuvel eor \t2, \t2, \s1 1571abee99eSArd Biesheuvel and \s1, \t1, \s0 1581abee99eSArd Biesheuvel orr \t1, \t1, \s0 1591abee99eSArd Biesheuvel eor \t3, \t3, \s3 1601abee99eSArd Biesheuvel eor \t0, \t0, \s1 1611abee99eSArd Biesheuvel eor \t2, \t2, \s2 1621abee99eSArd Biesheuvel eor \t1, \t1, \s3 1631abee99eSArd Biesheuvel eor \t0, \t0, \s2 1641abee99eSArd Biesheuvel and \s0, \x7, \x3 1651abee99eSArd Biesheuvel eor \t1, \t1, \s2 1661abee99eSArd Biesheuvel and \s1, \x6, \x2 1671abee99eSArd Biesheuvel and \s2, \x5, \x1 1681abee99eSArd Biesheuvel orr \s3, \x4, \x0 1691abee99eSArd Biesheuvel eor \t3, \t3, \s0 1701abee99eSArd Biesheuvel eor \t1, \t1, \s2 1711abee99eSArd Biesheuvel eor \s0, \t0, \s3 1721abee99eSArd Biesheuvel eor \t2, \t2, \s1 1731abee99eSArd Biesheuvel and \s2, \t3, \t1 1741abee99eSArd Biesheuvel eor \s1, \t2, \s2 1751abee99eSArd Biesheuvel eor \s3, \s0, \s2 1761abee99eSArd Biesheuvel bsl \s1, \t1, \s0 1771abee99eSArd Biesheuvel not \t0, \s0 1781abee99eSArd Biesheuvel bsl \s0, \s1, \s3 1791abee99eSArd Biesheuvel bsl \t0, \s1, \s3 1801abee99eSArd Biesheuvel bsl \s3, \t3, \t2 1811abee99eSArd Biesheuvel eor \t3, \t3, \t2 1821abee99eSArd Biesheuvel and \s2, \s0, \s3 1831abee99eSArd Biesheuvel eor \t1, \t1, \t0 1841abee99eSArd Biesheuvel eor \s2, \s2, \t3 1851abee99eSArd Biesheuvel mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 1861abee99eSArd Biesheuvel \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 1871abee99eSArd Biesheuvel .endm 1881abee99eSArd Biesheuvel 1891abee99eSArd Biesheuvel .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 1901abee99eSArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 1911abee99eSArd Biesheuvel in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ 1921abee99eSArd Biesheuvel \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b 1931abee99eSArd Biesheuvel inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \ 1941abee99eSArd Biesheuvel \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ 1951abee99eSArd Biesheuvel \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ 1961abee99eSArd Biesheuvel \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b 1971abee99eSArd Biesheuvel out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \ 1981abee99eSArd Biesheuvel \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b 1991abee99eSArd Biesheuvel .endm 2001abee99eSArd Biesheuvel 2011abee99eSArd Biesheuvel .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \ 2021abee99eSArd Biesheuvel t0, t1, t2, t3, s0, s1, s2, s3 2031abee99eSArd Biesheuvel inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \ 2041abee99eSArd Biesheuvel \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b 2051abee99eSArd Biesheuvel inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \ 2061abee99eSArd Biesheuvel \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ 2071abee99eSArd Biesheuvel \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \ 2081abee99eSArd Biesheuvel \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b 2091abee99eSArd Biesheuvel inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \ 2101abee99eSArd Biesheuvel \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b 2111abee99eSArd Biesheuvel .endm 2121abee99eSArd Biesheuvel 2131abee99eSArd Biesheuvel .macro enc_next_rk 2141abee99eSArd Biesheuvel ldp q16, q17, [bskey], #128 2151abee99eSArd Biesheuvel ldp q18, q19, [bskey, #-96] 2161abee99eSArd Biesheuvel ldp q20, q21, [bskey, #-64] 2171abee99eSArd Biesheuvel ldp q22, q23, [bskey, #-32] 2181abee99eSArd Biesheuvel .endm 2191abee99eSArd Biesheuvel 2201abee99eSArd Biesheuvel .macro dec_next_rk 2211abee99eSArd Biesheuvel ldp q16, q17, [bskey, #-128]! 2221abee99eSArd Biesheuvel ldp q18, q19, [bskey, #32] 2231abee99eSArd Biesheuvel ldp q20, q21, [bskey, #64] 2241abee99eSArd Biesheuvel ldp q22, q23, [bskey, #96] 2251abee99eSArd Biesheuvel .endm 2261abee99eSArd Biesheuvel 2271abee99eSArd Biesheuvel .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7 2281abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, v16.16b 2291abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, v17.16b 2301abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, v18.16b 2311abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, v19.16b 2321abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, v20.16b 2331abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, v21.16b 2341abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, v22.16b 2351abee99eSArd Biesheuvel eor \x7\().16b, \x7\().16b, v23.16b 2361abee99eSArd Biesheuvel .endm 2371abee99eSArd Biesheuvel 2381abee99eSArd Biesheuvel .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask 2391abee99eSArd Biesheuvel tbl \x0\().16b, {\x0\().16b}, \mask\().16b 2401abee99eSArd Biesheuvel tbl \x1\().16b, {\x1\().16b}, \mask\().16b 2411abee99eSArd Biesheuvel tbl \x2\().16b, {\x2\().16b}, \mask\().16b 2421abee99eSArd Biesheuvel tbl \x3\().16b, {\x3\().16b}, \mask\().16b 2431abee99eSArd Biesheuvel tbl \x4\().16b, {\x4\().16b}, \mask\().16b 2441abee99eSArd Biesheuvel tbl \x5\().16b, {\x5\().16b}, \mask\().16b 2451abee99eSArd Biesheuvel tbl \x6\().16b, {\x6\().16b}, \mask\().16b 2461abee99eSArd Biesheuvel tbl \x7\().16b, {\x7\().16b}, \mask\().16b 2471abee99eSArd Biesheuvel .endm 2481abee99eSArd Biesheuvel 2491abee99eSArd Biesheuvel .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 2501abee99eSArd Biesheuvel t0, t1, t2, t3, t4, t5, t6, t7, inv 2511abee99eSArd Biesheuvel ext \t0\().16b, \x0\().16b, \x0\().16b, #12 2521abee99eSArd Biesheuvel ext \t1\().16b, \x1\().16b, \x1\().16b, #12 2531abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, \t0\().16b 2541abee99eSArd Biesheuvel ext \t2\().16b, \x2\().16b, \x2\().16b, #12 2551abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t1\().16b 2561abee99eSArd Biesheuvel ext \t3\().16b, \x3\().16b, \x3\().16b, #12 2571abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, \t2\().16b 2581abee99eSArd Biesheuvel ext \t4\().16b, \x4\().16b, \x4\().16b, #12 2591abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t3\().16b 2601abee99eSArd Biesheuvel ext \t5\().16b, \x5\().16b, \x5\().16b, #12 2611abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t4\().16b 2621abee99eSArd Biesheuvel ext \t6\().16b, \x6\().16b, \x6\().16b, #12 2631abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t5\().16b 2641abee99eSArd Biesheuvel ext \t7\().16b, \x7\().16b, \x7\().16b, #12 2651abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, \t6\().16b 2661abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \x0\().16b 2671abee99eSArd Biesheuvel eor \x7\().16b, \x7\().16b, \t7\().16b 2681abee99eSArd Biesheuvel ext \x0\().16b, \x0\().16b, \x0\().16b, #8 2691abee99eSArd Biesheuvel eor \t2\().16b, \t2\().16b, \x1\().16b 2701abee99eSArd Biesheuvel eor \t0\().16b, \t0\().16b, \x7\().16b 2711abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \x7\().16b 2721abee99eSArd Biesheuvel ext \x1\().16b, \x1\().16b, \x1\().16b, #8 2731abee99eSArd Biesheuvel eor \t5\().16b, \t5\().16b, \x4\().16b 2741abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, \t0\().16b 2751abee99eSArd Biesheuvel eor \t6\().16b, \t6\().16b, \x5\().16b 2761abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t1\().16b 2771abee99eSArd Biesheuvel ext \t0\().16b, \x4\().16b, \x4\().16b, #8 2781abee99eSArd Biesheuvel eor \t4\().16b, \t4\().16b, \x3\().16b 2791abee99eSArd Biesheuvel ext \t1\().16b, \x5\().16b, \x5\().16b, #8 2801abee99eSArd Biesheuvel eor \t7\().16b, \t7\().16b, \x6\().16b 2811abee99eSArd Biesheuvel ext \x4\().16b, \x3\().16b, \x3\().16b, #8 2821abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x2\().16b 2831abee99eSArd Biesheuvel ext \x5\().16b, \x7\().16b, \x7\().16b, #8 2841abee99eSArd Biesheuvel eor \t4\().16b, \t4\().16b, \x7\().16b 2851abee99eSArd Biesheuvel ext \x3\().16b, \x6\().16b, \x6\().16b, #8 2861abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x7\().16b 2871abee99eSArd Biesheuvel ext \x6\().16b, \x2\().16b, \x2\().16b, #8 2881abee99eSArd Biesheuvel eor \x7\().16b, \t1\().16b, \t5\().16b 2891abee99eSArd Biesheuvel .ifb \inv 2901abee99eSArd Biesheuvel eor \x2\().16b, \t0\().16b, \t4\().16b 2911abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t3\().16b 2921abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t7\().16b 2931abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t6\().16b 2941abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, \t2\().16b 2951abee99eSArd Biesheuvel .else 2961abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x4\().16b 2971abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t7\().16b 2981abee99eSArd Biesheuvel eor \x2\().16b, \x3\().16b, \t6\().16b 2991abee99eSArd Biesheuvel eor \x3\().16b, \t0\().16b, \t4\().16b 3001abee99eSArd Biesheuvel eor \x4\().16b, \x6\().16b, \t2\().16b 3011abee99eSArd Biesheuvel mov \x6\().16b, \t3\().16b 3021abee99eSArd Biesheuvel .endif 3031abee99eSArd Biesheuvel .endm 3041abee99eSArd Biesheuvel 3051abee99eSArd Biesheuvel .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \ 3061abee99eSArd Biesheuvel t0, t1, t2, t3, t4, t5, t6, t7 3071abee99eSArd Biesheuvel ext \t0\().16b, \x0\().16b, \x0\().16b, #8 3081abee99eSArd Biesheuvel ext \t6\().16b, \x6\().16b, \x6\().16b, #8 3091abee99eSArd Biesheuvel ext \t7\().16b, \x7\().16b, \x7\().16b, #8 3101abee99eSArd Biesheuvel eor \t0\().16b, \t0\().16b, \x0\().16b 3111abee99eSArd Biesheuvel ext \t1\().16b, \x1\().16b, \x1\().16b, #8 3121abee99eSArd Biesheuvel eor \t6\().16b, \t6\().16b, \x6\().16b 3131abee99eSArd Biesheuvel ext \t2\().16b, \x2\().16b, \x2\().16b, #8 3141abee99eSArd Biesheuvel eor \t7\().16b, \t7\().16b, \x7\().16b 3151abee99eSArd Biesheuvel ext \t3\().16b, \x3\().16b, \x3\().16b, #8 3161abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \x1\().16b 3171abee99eSArd Biesheuvel ext \t4\().16b, \x4\().16b, \x4\().16b, #8 3181abee99eSArd Biesheuvel eor \t2\().16b, \t2\().16b, \x2\().16b 3191abee99eSArd Biesheuvel ext \t5\().16b, \x5\().16b, \x5\().16b, #8 3201abee99eSArd Biesheuvel eor \t3\().16b, \t3\().16b, \x3\().16b 3211abee99eSArd Biesheuvel eor \t4\().16b, \t4\().16b, \x4\().16b 3221abee99eSArd Biesheuvel eor \t5\().16b, \t5\().16b, \x5\().16b 3231abee99eSArd Biesheuvel eor \x0\().16b, \x0\().16b, \t6\().16b 3241abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t6\().16b 3251abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, \t0\().16b 3261abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t2\().16b 3271abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t1\().16b 3281abee99eSArd Biesheuvel eor \x1\().16b, \x1\().16b, \t7\().16b 3291abee99eSArd Biesheuvel eor \x2\().16b, \x2\().16b, \t7\().16b 3301abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t6\().16b 3311abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t3\().16b 3321abee99eSArd Biesheuvel eor \x3\().16b, \x3\().16b, \t6\().16b 3331abee99eSArd Biesheuvel eor \x6\().16b, \x6\().16b, \t4\().16b 3341abee99eSArd Biesheuvel eor \x4\().16b, \x4\().16b, \t7\().16b 3351abee99eSArd Biesheuvel eor \x5\().16b, \x5\().16b, \t7\().16b 3361abee99eSArd Biesheuvel eor \x7\().16b, \x7\().16b, \t5\().16b 3371abee99eSArd Biesheuvel mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \ 3381abee99eSArd Biesheuvel \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1 3391abee99eSArd Biesheuvel .endm 3401abee99eSArd Biesheuvel 3411abee99eSArd Biesheuvel .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1 3421abee99eSArd Biesheuvel ushr \t0\().2d, \b0\().2d, #\n 3431abee99eSArd Biesheuvel ushr \t1\().2d, \b1\().2d, #\n 3441abee99eSArd Biesheuvel eor \t0\().16b, \t0\().16b, \a0\().16b 3451abee99eSArd Biesheuvel eor \t1\().16b, \t1\().16b, \a1\().16b 3461abee99eSArd Biesheuvel and \t0\().16b, \t0\().16b, \mask\().16b 3471abee99eSArd Biesheuvel and \t1\().16b, \t1\().16b, \mask\().16b 3481abee99eSArd Biesheuvel eor \a0\().16b, \a0\().16b, \t0\().16b 3491abee99eSArd Biesheuvel shl \t0\().2d, \t0\().2d, #\n 3501abee99eSArd Biesheuvel eor \a1\().16b, \a1\().16b, \t1\().16b 3511abee99eSArd Biesheuvel shl \t1\().2d, \t1\().2d, #\n 3521abee99eSArd Biesheuvel eor \b0\().16b, \b0\().16b, \t0\().16b 3531abee99eSArd Biesheuvel eor \b1\().16b, \b1\().16b, \t1\().16b 3541abee99eSArd Biesheuvel .endm 3551abee99eSArd Biesheuvel 3561abee99eSArd Biesheuvel .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3 3571abee99eSArd Biesheuvel movi \t0\().16b, #0x55 3581abee99eSArd Biesheuvel movi \t1\().16b, #0x33 3591abee99eSArd Biesheuvel swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3 3601abee99eSArd Biesheuvel swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3 3611abee99eSArd Biesheuvel movi \t0\().16b, #0x0f 3621abee99eSArd Biesheuvel swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3 3631abee99eSArd Biesheuvel swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3 3641abee99eSArd Biesheuvel swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3 3651abee99eSArd Biesheuvel swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3 3661abee99eSArd Biesheuvel .endm 3671abee99eSArd Biesheuvel 3681abee99eSArd Biesheuvel 3691abee99eSArd Biesheuvel .align 6 3701abee99eSArd BiesheuvelM0: .octa 0x0004080c0105090d02060a0e03070b0f 3711abee99eSArd Biesheuvel 3721abee99eSArd BiesheuvelM0SR: .octa 0x0004080c05090d010a0e02060f03070b 3731abee99eSArd BiesheuvelSR: .octa 0x0f0e0d0c0a09080b0504070600030201 3741abee99eSArd BiesheuvelSRM0: .octa 0x01060b0c0207080d0304090e00050a0f 3751abee99eSArd Biesheuvel 3761abee99eSArd BiesheuvelM0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03 3771abee99eSArd BiesheuvelISR: .octa 0x0f0e0d0c080b0a090504070602010003 3781abee99eSArd BiesheuvelISRM0: .octa 0x0306090c00070a0d01040b0e0205080f 3791abee99eSArd Biesheuvel 3801abee99eSArd Biesheuvel /* 3811abee99eSArd Biesheuvel * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds) 3821abee99eSArd Biesheuvel */ 3830e89640bSMark BrownSYM_FUNC_START(aesbs_convert_key) 3841abee99eSArd Biesheuvel ld1 {v7.4s}, [x1], #16 // load round 0 key 3851abee99eSArd Biesheuvel ld1 {v17.4s}, [x1], #16 // load round 1 key 3861abee99eSArd Biesheuvel 3871abee99eSArd Biesheuvel movi v8.16b, #0x01 // bit masks 3881abee99eSArd Biesheuvel movi v9.16b, #0x02 3891abee99eSArd Biesheuvel movi v10.16b, #0x04 3901abee99eSArd Biesheuvel movi v11.16b, #0x08 3911abee99eSArd Biesheuvel movi v12.16b, #0x10 3921abee99eSArd Biesheuvel movi v13.16b, #0x20 3931abee99eSArd Biesheuvel movi v14.16b, #0x40 3941abee99eSArd Biesheuvel movi v15.16b, #0x80 3951abee99eSArd Biesheuvel ldr q16, M0 3961abee99eSArd Biesheuvel 3971abee99eSArd Biesheuvel sub x2, x2, #1 3981abee99eSArd Biesheuvel str q7, [x0], #16 // save round 0 key 3991abee99eSArd Biesheuvel 4001abee99eSArd Biesheuvel.Lkey_loop: 4011abee99eSArd Biesheuvel tbl v7.16b ,{v17.16b}, v16.16b 4021abee99eSArd Biesheuvel ld1 {v17.4s}, [x1], #16 // load next round key 4031abee99eSArd Biesheuvel 4041abee99eSArd Biesheuvel cmtst v0.16b, v7.16b, v8.16b 4051abee99eSArd Biesheuvel cmtst v1.16b, v7.16b, v9.16b 4061abee99eSArd Biesheuvel cmtst v2.16b, v7.16b, v10.16b 4071abee99eSArd Biesheuvel cmtst v3.16b, v7.16b, v11.16b 4081abee99eSArd Biesheuvel cmtst v4.16b, v7.16b, v12.16b 4091abee99eSArd Biesheuvel cmtst v5.16b, v7.16b, v13.16b 4101abee99eSArd Biesheuvel cmtst v6.16b, v7.16b, v14.16b 4111abee99eSArd Biesheuvel cmtst v7.16b, v7.16b, v15.16b 4121abee99eSArd Biesheuvel not v0.16b, v0.16b 4131abee99eSArd Biesheuvel not v1.16b, v1.16b 4141abee99eSArd Biesheuvel not v5.16b, v5.16b 4151abee99eSArd Biesheuvel not v6.16b, v6.16b 4161abee99eSArd Biesheuvel 4171abee99eSArd Biesheuvel subs x2, x2, #1 4181abee99eSArd Biesheuvel stp q0, q1, [x0], #128 4191abee99eSArd Biesheuvel stp q2, q3, [x0, #-96] 4201abee99eSArd Biesheuvel stp q4, q5, [x0, #-64] 4211abee99eSArd Biesheuvel stp q6, q7, [x0, #-32] 4221abee99eSArd Biesheuvel b.ne .Lkey_loop 4231abee99eSArd Biesheuvel 4241abee99eSArd Biesheuvel movi v7.16b, #0x63 // compose .L63 4251abee99eSArd Biesheuvel eor v17.16b, v17.16b, v7.16b 4261abee99eSArd Biesheuvel str q17, [x0] 4271abee99eSArd Biesheuvel ret 4280e89640bSMark BrownSYM_FUNC_END(aesbs_convert_key) 4291abee99eSArd Biesheuvel 4301abee99eSArd Biesheuvel .align 4 4310e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_encrypt8) 4321abee99eSArd Biesheuvel ldr q9, [bskey], #16 // round 0 key 4331abee99eSArd Biesheuvel ldr q8, M0SR 4341abee99eSArd Biesheuvel ldr q24, SR 4351abee99eSArd Biesheuvel 4361abee99eSArd Biesheuvel eor v10.16b, v0.16b, v9.16b // xor with round0 key 4371abee99eSArd Biesheuvel eor v11.16b, v1.16b, v9.16b 4381abee99eSArd Biesheuvel tbl v0.16b, {v10.16b}, v8.16b 4391abee99eSArd Biesheuvel eor v12.16b, v2.16b, v9.16b 4401abee99eSArd Biesheuvel tbl v1.16b, {v11.16b}, v8.16b 4411abee99eSArd Biesheuvel eor v13.16b, v3.16b, v9.16b 4421abee99eSArd Biesheuvel tbl v2.16b, {v12.16b}, v8.16b 4431abee99eSArd Biesheuvel eor v14.16b, v4.16b, v9.16b 4441abee99eSArd Biesheuvel tbl v3.16b, {v13.16b}, v8.16b 4451abee99eSArd Biesheuvel eor v15.16b, v5.16b, v9.16b 4461abee99eSArd Biesheuvel tbl v4.16b, {v14.16b}, v8.16b 4471abee99eSArd Biesheuvel eor v10.16b, v6.16b, v9.16b 4481abee99eSArd Biesheuvel tbl v5.16b, {v15.16b}, v8.16b 4491abee99eSArd Biesheuvel eor v11.16b, v7.16b, v9.16b 4501abee99eSArd Biesheuvel tbl v6.16b, {v10.16b}, v8.16b 4511abee99eSArd Biesheuvel tbl v7.16b, {v11.16b}, v8.16b 4521abee99eSArd Biesheuvel 4531abee99eSArd Biesheuvel bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 4541abee99eSArd Biesheuvel 4551abee99eSArd Biesheuvel sub rounds, rounds, #1 4561abee99eSArd Biesheuvel b .Lenc_sbox 4571abee99eSArd Biesheuvel 4581abee99eSArd Biesheuvel.Lenc_loop: 4591abee99eSArd Biesheuvel shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 4601abee99eSArd Biesheuvel.Lenc_sbox: 4611abee99eSArd Biesheuvel sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ 4621abee99eSArd Biesheuvel v13, v14, v15 4631abee99eSArd Biesheuvel subs rounds, rounds, #1 4641abee99eSArd Biesheuvel b.cc .Lenc_done 4651abee99eSArd Biesheuvel 4661abee99eSArd Biesheuvel enc_next_rk 4671abee99eSArd Biesheuvel 4681abee99eSArd Biesheuvel mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \ 4691abee99eSArd Biesheuvel v13, v14, v15 4701abee99eSArd Biesheuvel 4711abee99eSArd Biesheuvel add_round_key v0, v1, v2, v3, v4, v5, v6, v7 4721abee99eSArd Biesheuvel 4731abee99eSArd Biesheuvel b.ne .Lenc_loop 4741abee99eSArd Biesheuvel ldr q24, SRM0 4751abee99eSArd Biesheuvel b .Lenc_loop 4761abee99eSArd Biesheuvel 4771abee99eSArd Biesheuvel.Lenc_done: 4781abee99eSArd Biesheuvel ldr q12, [bskey] // last round key 4791abee99eSArd Biesheuvel 4801abee99eSArd Biesheuvel bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11 4811abee99eSArd Biesheuvel 4821abee99eSArd Biesheuvel eor v0.16b, v0.16b, v12.16b 4831abee99eSArd Biesheuvel eor v1.16b, v1.16b, v12.16b 4841abee99eSArd Biesheuvel eor v4.16b, v4.16b, v12.16b 4851abee99eSArd Biesheuvel eor v6.16b, v6.16b, v12.16b 4861abee99eSArd Biesheuvel eor v3.16b, v3.16b, v12.16b 4871abee99eSArd Biesheuvel eor v7.16b, v7.16b, v12.16b 4881abee99eSArd Biesheuvel eor v2.16b, v2.16b, v12.16b 4891abee99eSArd Biesheuvel eor v5.16b, v5.16b, v12.16b 4901abee99eSArd Biesheuvel ret 4910e89640bSMark BrownSYM_FUNC_END(aesbs_encrypt8) 4921abee99eSArd Biesheuvel 4931abee99eSArd Biesheuvel .align 4 4940e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_decrypt8) 4951abee99eSArd Biesheuvel lsl x9, rounds, #7 4961abee99eSArd Biesheuvel add bskey, bskey, x9 4971abee99eSArd Biesheuvel 4981abee99eSArd Biesheuvel ldr q9, [bskey, #-112]! // round 0 key 4991abee99eSArd Biesheuvel ldr q8, M0ISR 5001abee99eSArd Biesheuvel ldr q24, ISR 5011abee99eSArd Biesheuvel 5021abee99eSArd Biesheuvel eor v10.16b, v0.16b, v9.16b // xor with round0 key 5031abee99eSArd Biesheuvel eor v11.16b, v1.16b, v9.16b 5041abee99eSArd Biesheuvel tbl v0.16b, {v10.16b}, v8.16b 5051abee99eSArd Biesheuvel eor v12.16b, v2.16b, v9.16b 5061abee99eSArd Biesheuvel tbl v1.16b, {v11.16b}, v8.16b 5071abee99eSArd Biesheuvel eor v13.16b, v3.16b, v9.16b 5081abee99eSArd Biesheuvel tbl v2.16b, {v12.16b}, v8.16b 5091abee99eSArd Biesheuvel eor v14.16b, v4.16b, v9.16b 5101abee99eSArd Biesheuvel tbl v3.16b, {v13.16b}, v8.16b 5111abee99eSArd Biesheuvel eor v15.16b, v5.16b, v9.16b 5121abee99eSArd Biesheuvel tbl v4.16b, {v14.16b}, v8.16b 5131abee99eSArd Biesheuvel eor v10.16b, v6.16b, v9.16b 5141abee99eSArd Biesheuvel tbl v5.16b, {v15.16b}, v8.16b 5151abee99eSArd Biesheuvel eor v11.16b, v7.16b, v9.16b 5161abee99eSArd Biesheuvel tbl v6.16b, {v10.16b}, v8.16b 5171abee99eSArd Biesheuvel tbl v7.16b, {v11.16b}, v8.16b 5181abee99eSArd Biesheuvel 5191abee99eSArd Biesheuvel bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 5201abee99eSArd Biesheuvel 5211abee99eSArd Biesheuvel sub rounds, rounds, #1 5221abee99eSArd Biesheuvel b .Ldec_sbox 5231abee99eSArd Biesheuvel 5241abee99eSArd Biesheuvel.Ldec_loop: 5251abee99eSArd Biesheuvel shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24 5261abee99eSArd Biesheuvel.Ldec_sbox: 5271abee99eSArd Biesheuvel inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \ 5281abee99eSArd Biesheuvel v13, v14, v15 5291abee99eSArd Biesheuvel subs rounds, rounds, #1 5301abee99eSArd Biesheuvel b.cc .Ldec_done 5311abee99eSArd Biesheuvel 5321abee99eSArd Biesheuvel dec_next_rk 5331abee99eSArd Biesheuvel 5341abee99eSArd Biesheuvel add_round_key v0, v1, v6, v4, v2, v7, v3, v5 5351abee99eSArd Biesheuvel 5361abee99eSArd Biesheuvel inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \ 5371abee99eSArd Biesheuvel v13, v14, v15 5381abee99eSArd Biesheuvel 5391abee99eSArd Biesheuvel b.ne .Ldec_loop 5401abee99eSArd Biesheuvel ldr q24, ISRM0 5411abee99eSArd Biesheuvel b .Ldec_loop 5421abee99eSArd Biesheuvel.Ldec_done: 5431abee99eSArd Biesheuvel ldr q12, [bskey, #-16] // last round key 5441abee99eSArd Biesheuvel 5451abee99eSArd Biesheuvel bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11 5461abee99eSArd Biesheuvel 5471abee99eSArd Biesheuvel eor v0.16b, v0.16b, v12.16b 5481abee99eSArd Biesheuvel eor v1.16b, v1.16b, v12.16b 5491abee99eSArd Biesheuvel eor v6.16b, v6.16b, v12.16b 5501abee99eSArd Biesheuvel eor v4.16b, v4.16b, v12.16b 5511abee99eSArd Biesheuvel eor v2.16b, v2.16b, v12.16b 5521abee99eSArd Biesheuvel eor v7.16b, v7.16b, v12.16b 5531abee99eSArd Biesheuvel eor v3.16b, v3.16b, v12.16b 5541abee99eSArd Biesheuvel eor v5.16b, v5.16b, v12.16b 5551abee99eSArd Biesheuvel ret 5560e89640bSMark BrownSYM_FUNC_END(aesbs_decrypt8) 5571abee99eSArd Biesheuvel 5581abee99eSArd Biesheuvel /* 5591abee99eSArd Biesheuvel * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 5601abee99eSArd Biesheuvel * int blocks) 5611abee99eSArd Biesheuvel * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 5621abee99eSArd Biesheuvel * int blocks) 5631abee99eSArd Biesheuvel */ 5641abee99eSArd Biesheuvel .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 56520ab6332SArd Biesheuvel frame_push 5 56620ab6332SArd Biesheuvel 56720ab6332SArd Biesheuvel mov x19, x0 56820ab6332SArd Biesheuvel mov x20, x1 56920ab6332SArd Biesheuvel mov x21, x2 57020ab6332SArd Biesheuvel mov x22, x3 57120ab6332SArd Biesheuvel mov x23, x4 5721abee99eSArd Biesheuvel 5731abee99eSArd Biesheuvel99: mov x5, #1 57420ab6332SArd Biesheuvel lsl x5, x5, x23 57520ab6332SArd Biesheuvel subs w23, w23, #8 57620ab6332SArd Biesheuvel csel x23, x23, xzr, pl 5771abee99eSArd Biesheuvel csel x5, x5, xzr, mi 5781abee99eSArd Biesheuvel 57920ab6332SArd Biesheuvel ld1 {v0.16b}, [x20], #16 5801abee99eSArd Biesheuvel tbnz x5, #1, 0f 58120ab6332SArd Biesheuvel ld1 {v1.16b}, [x20], #16 5821abee99eSArd Biesheuvel tbnz x5, #2, 0f 58320ab6332SArd Biesheuvel ld1 {v2.16b}, [x20], #16 5841abee99eSArd Biesheuvel tbnz x5, #3, 0f 58520ab6332SArd Biesheuvel ld1 {v3.16b}, [x20], #16 5861abee99eSArd Biesheuvel tbnz x5, #4, 0f 58720ab6332SArd Biesheuvel ld1 {v4.16b}, [x20], #16 5881abee99eSArd Biesheuvel tbnz x5, #5, 0f 58920ab6332SArd Biesheuvel ld1 {v5.16b}, [x20], #16 5901abee99eSArd Biesheuvel tbnz x5, #6, 0f 59120ab6332SArd Biesheuvel ld1 {v6.16b}, [x20], #16 5921abee99eSArd Biesheuvel tbnz x5, #7, 0f 59320ab6332SArd Biesheuvel ld1 {v7.16b}, [x20], #16 5941abee99eSArd Biesheuvel 59520ab6332SArd Biesheuvel0: mov bskey, x21 59620ab6332SArd Biesheuvel mov rounds, x22 5971abee99eSArd Biesheuvel bl \do8 5981abee99eSArd Biesheuvel 59920ab6332SArd Biesheuvel st1 {\o0\().16b}, [x19], #16 6001abee99eSArd Biesheuvel tbnz x5, #1, 1f 60120ab6332SArd Biesheuvel st1 {\o1\().16b}, [x19], #16 6021abee99eSArd Biesheuvel tbnz x5, #2, 1f 60320ab6332SArd Biesheuvel st1 {\o2\().16b}, [x19], #16 6041abee99eSArd Biesheuvel tbnz x5, #3, 1f 60520ab6332SArd Biesheuvel st1 {\o3\().16b}, [x19], #16 6061abee99eSArd Biesheuvel tbnz x5, #4, 1f 60720ab6332SArd Biesheuvel st1 {\o4\().16b}, [x19], #16 6081abee99eSArd Biesheuvel tbnz x5, #5, 1f 60920ab6332SArd Biesheuvel st1 {\o5\().16b}, [x19], #16 6101abee99eSArd Biesheuvel tbnz x5, #6, 1f 61120ab6332SArd Biesheuvel st1 {\o6\().16b}, [x19], #16 6121abee99eSArd Biesheuvel tbnz x5, #7, 1f 61320ab6332SArd Biesheuvel st1 {\o7\().16b}, [x19], #16 6141abee99eSArd Biesheuvel 61520ab6332SArd Biesheuvel cbz x23, 1f 61620ab6332SArd Biesheuvel cond_yield_neon 61720ab6332SArd Biesheuvel b 99b 6181abee99eSArd Biesheuvel 61920ab6332SArd Biesheuvel1: frame_pop 6201abee99eSArd Biesheuvel ret 6211abee99eSArd Biesheuvel .endm 6221abee99eSArd Biesheuvel 6231abee99eSArd Biesheuvel .align 4 6240e89640bSMark BrownSYM_FUNC_START(aesbs_ecb_encrypt) 6251abee99eSArd Biesheuvel __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 6260e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_encrypt) 6271abee99eSArd Biesheuvel 6281abee99eSArd Biesheuvel .align 4 6290e89640bSMark BrownSYM_FUNC_START(aesbs_ecb_decrypt) 6301abee99eSArd Biesheuvel __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 6310e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_decrypt) 6321abee99eSArd Biesheuvel 6331abee99eSArd Biesheuvel /* 6341abee99eSArd Biesheuvel * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 6351abee99eSArd Biesheuvel * int blocks, u8 iv[]) 6361abee99eSArd Biesheuvel */ 6371abee99eSArd Biesheuvel .align 4 6380e89640bSMark BrownSYM_FUNC_START(aesbs_cbc_decrypt) 63920ab6332SArd Biesheuvel frame_push 6 64020ab6332SArd Biesheuvel 64120ab6332SArd Biesheuvel mov x19, x0 64220ab6332SArd Biesheuvel mov x20, x1 64320ab6332SArd Biesheuvel mov x21, x2 64420ab6332SArd Biesheuvel mov x22, x3 64520ab6332SArd Biesheuvel mov x23, x4 64620ab6332SArd Biesheuvel mov x24, x5 6471abee99eSArd Biesheuvel 6481abee99eSArd Biesheuvel99: mov x6, #1 64920ab6332SArd Biesheuvel lsl x6, x6, x23 65020ab6332SArd Biesheuvel subs w23, w23, #8 65120ab6332SArd Biesheuvel csel x23, x23, xzr, pl 6521abee99eSArd Biesheuvel csel x6, x6, xzr, mi 6531abee99eSArd Biesheuvel 65420ab6332SArd Biesheuvel ld1 {v0.16b}, [x20], #16 6551abee99eSArd Biesheuvel mov v25.16b, v0.16b 6561abee99eSArd Biesheuvel tbnz x6, #1, 0f 65720ab6332SArd Biesheuvel ld1 {v1.16b}, [x20], #16 6581abee99eSArd Biesheuvel mov v26.16b, v1.16b 6591abee99eSArd Biesheuvel tbnz x6, #2, 0f 66020ab6332SArd Biesheuvel ld1 {v2.16b}, [x20], #16 6611abee99eSArd Biesheuvel mov v27.16b, v2.16b 6621abee99eSArd Biesheuvel tbnz x6, #3, 0f 66320ab6332SArd Biesheuvel ld1 {v3.16b}, [x20], #16 6641abee99eSArd Biesheuvel mov v28.16b, v3.16b 6651abee99eSArd Biesheuvel tbnz x6, #4, 0f 66620ab6332SArd Biesheuvel ld1 {v4.16b}, [x20], #16 6671abee99eSArd Biesheuvel mov v29.16b, v4.16b 6681abee99eSArd Biesheuvel tbnz x6, #5, 0f 66920ab6332SArd Biesheuvel ld1 {v5.16b}, [x20], #16 6701abee99eSArd Biesheuvel mov v30.16b, v5.16b 6711abee99eSArd Biesheuvel tbnz x6, #6, 0f 67220ab6332SArd Biesheuvel ld1 {v6.16b}, [x20], #16 6731abee99eSArd Biesheuvel mov v31.16b, v6.16b 6741abee99eSArd Biesheuvel tbnz x6, #7, 0f 67520ab6332SArd Biesheuvel ld1 {v7.16b}, [x20] 6761abee99eSArd Biesheuvel 67720ab6332SArd Biesheuvel0: mov bskey, x21 67820ab6332SArd Biesheuvel mov rounds, x22 6791abee99eSArd Biesheuvel bl aesbs_decrypt8 6801abee99eSArd Biesheuvel 68120ab6332SArd Biesheuvel ld1 {v24.16b}, [x24] // load IV 6821abee99eSArd Biesheuvel 6831abee99eSArd Biesheuvel eor v1.16b, v1.16b, v25.16b 6841abee99eSArd Biesheuvel eor v6.16b, v6.16b, v26.16b 6851abee99eSArd Biesheuvel eor v4.16b, v4.16b, v27.16b 6861abee99eSArd Biesheuvel eor v2.16b, v2.16b, v28.16b 6871abee99eSArd Biesheuvel eor v7.16b, v7.16b, v29.16b 6881abee99eSArd Biesheuvel eor v0.16b, v0.16b, v24.16b 6891abee99eSArd Biesheuvel eor v3.16b, v3.16b, v30.16b 6901abee99eSArd Biesheuvel eor v5.16b, v5.16b, v31.16b 6911abee99eSArd Biesheuvel 69220ab6332SArd Biesheuvel st1 {v0.16b}, [x19], #16 6931abee99eSArd Biesheuvel mov v24.16b, v25.16b 6941abee99eSArd Biesheuvel tbnz x6, #1, 1f 69520ab6332SArd Biesheuvel st1 {v1.16b}, [x19], #16 6961abee99eSArd Biesheuvel mov v24.16b, v26.16b 6971abee99eSArd Biesheuvel tbnz x6, #2, 1f 69820ab6332SArd Biesheuvel st1 {v6.16b}, [x19], #16 6991abee99eSArd Biesheuvel mov v24.16b, v27.16b 7001abee99eSArd Biesheuvel tbnz x6, #3, 1f 70120ab6332SArd Biesheuvel st1 {v4.16b}, [x19], #16 7021abee99eSArd Biesheuvel mov v24.16b, v28.16b 7031abee99eSArd Biesheuvel tbnz x6, #4, 1f 70420ab6332SArd Biesheuvel st1 {v2.16b}, [x19], #16 7051abee99eSArd Biesheuvel mov v24.16b, v29.16b 7061abee99eSArd Biesheuvel tbnz x6, #5, 1f 70720ab6332SArd Biesheuvel st1 {v7.16b}, [x19], #16 7081abee99eSArd Biesheuvel mov v24.16b, v30.16b 7091abee99eSArd Biesheuvel tbnz x6, #6, 1f 71020ab6332SArd Biesheuvel st1 {v3.16b}, [x19], #16 7111abee99eSArd Biesheuvel mov v24.16b, v31.16b 7121abee99eSArd Biesheuvel tbnz x6, #7, 1f 71320ab6332SArd Biesheuvel ld1 {v24.16b}, [x20], #16 71420ab6332SArd Biesheuvel st1 {v5.16b}, [x19], #16 71520ab6332SArd Biesheuvel1: st1 {v24.16b}, [x24] // store IV 7161abee99eSArd Biesheuvel 71720ab6332SArd Biesheuvel cbz x23, 2f 71820ab6332SArd Biesheuvel cond_yield_neon 71920ab6332SArd Biesheuvel b 99b 7201abee99eSArd Biesheuvel 72120ab6332SArd Biesheuvel2: frame_pop 7221abee99eSArd Biesheuvel ret 7230e89640bSMark BrownSYM_FUNC_END(aesbs_cbc_decrypt) 7241abee99eSArd Biesheuvel 7251abee99eSArd Biesheuvel .macro next_tweak, out, in, const, tmp 7261abee99eSArd Biesheuvel sshr \tmp\().2d, \in\().2d, #63 7271abee99eSArd Biesheuvel and \tmp\().16b, \tmp\().16b, \const\().16b 7281abee99eSArd Biesheuvel add \out\().2d, \in\().2d, \in\().2d 7291abee99eSArd Biesheuvel ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 7301abee99eSArd Biesheuvel eor \out\().16b, \out\().16b, \tmp\().16b 7311abee99eSArd Biesheuvel .endm 7321abee99eSArd Biesheuvel 7331abee99eSArd Biesheuvel /* 7341abee99eSArd Biesheuvel * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 7351abee99eSArd Biesheuvel * int blocks, u8 iv[]) 7361abee99eSArd Biesheuvel * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 7371abee99eSArd Biesheuvel * int blocks, u8 iv[]) 7381abee99eSArd Biesheuvel */ 7390e89640bSMark BrownSYM_FUNC_START_LOCAL(__xts_crypt8) 7401abee99eSArd Biesheuvel mov x6, #1 74120ab6332SArd Biesheuvel lsl x6, x6, x23 74220ab6332SArd Biesheuvel subs w23, w23, #8 74320ab6332SArd Biesheuvel csel x23, x23, xzr, pl 7441abee99eSArd Biesheuvel csel x6, x6, xzr, mi 7451abee99eSArd Biesheuvel 74620ab6332SArd Biesheuvel ld1 {v0.16b}, [x20], #16 7471abee99eSArd Biesheuvel next_tweak v26, v25, v30, v31 7481abee99eSArd Biesheuvel eor v0.16b, v0.16b, v25.16b 7491abee99eSArd Biesheuvel tbnz x6, #1, 0f 7501abee99eSArd Biesheuvel 75120ab6332SArd Biesheuvel ld1 {v1.16b}, [x20], #16 7521abee99eSArd Biesheuvel next_tweak v27, v26, v30, v31 7531abee99eSArd Biesheuvel eor v1.16b, v1.16b, v26.16b 7541abee99eSArd Biesheuvel tbnz x6, #2, 0f 7551abee99eSArd Biesheuvel 75620ab6332SArd Biesheuvel ld1 {v2.16b}, [x20], #16 7571abee99eSArd Biesheuvel next_tweak v28, v27, v30, v31 7581abee99eSArd Biesheuvel eor v2.16b, v2.16b, v27.16b 7591abee99eSArd Biesheuvel tbnz x6, #3, 0f 7601abee99eSArd Biesheuvel 76120ab6332SArd Biesheuvel ld1 {v3.16b}, [x20], #16 7621abee99eSArd Biesheuvel next_tweak v29, v28, v30, v31 7631abee99eSArd Biesheuvel eor v3.16b, v3.16b, v28.16b 7641abee99eSArd Biesheuvel tbnz x6, #4, 0f 7651abee99eSArd Biesheuvel 76620ab6332SArd Biesheuvel ld1 {v4.16b}, [x20], #16 76720ab6332SArd Biesheuvel str q29, [sp, #.Lframe_local_offset] 7681abee99eSArd Biesheuvel eor v4.16b, v4.16b, v29.16b 7691abee99eSArd Biesheuvel next_tweak v29, v29, v30, v31 7701abee99eSArd Biesheuvel tbnz x6, #5, 0f 7711abee99eSArd Biesheuvel 77220ab6332SArd Biesheuvel ld1 {v5.16b}, [x20], #16 77320ab6332SArd Biesheuvel str q29, [sp, #.Lframe_local_offset + 16] 7741abee99eSArd Biesheuvel eor v5.16b, v5.16b, v29.16b 7751abee99eSArd Biesheuvel next_tweak v29, v29, v30, v31 7761abee99eSArd Biesheuvel tbnz x6, #6, 0f 7771abee99eSArd Biesheuvel 77820ab6332SArd Biesheuvel ld1 {v6.16b}, [x20], #16 77920ab6332SArd Biesheuvel str q29, [sp, #.Lframe_local_offset + 32] 7801abee99eSArd Biesheuvel eor v6.16b, v6.16b, v29.16b 7811abee99eSArd Biesheuvel next_tweak v29, v29, v30, v31 7821abee99eSArd Biesheuvel tbnz x6, #7, 0f 7831abee99eSArd Biesheuvel 78420ab6332SArd Biesheuvel ld1 {v7.16b}, [x20], #16 78520ab6332SArd Biesheuvel str q29, [sp, #.Lframe_local_offset + 48] 7861abee99eSArd Biesheuvel eor v7.16b, v7.16b, v29.16b 7871abee99eSArd Biesheuvel next_tweak v29, v29, v30, v31 7881abee99eSArd Biesheuvel 78920ab6332SArd Biesheuvel0: mov bskey, x21 79020ab6332SArd Biesheuvel mov rounds, x22 791*39e4716cSJeremy Linton br x16 7920e89640bSMark BrownSYM_FUNC_END(__xts_crypt8) 7931abee99eSArd Biesheuvel 7941abee99eSArd Biesheuvel .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 79520ab6332SArd Biesheuvel frame_push 6, 64 7961abee99eSArd Biesheuvel 79720ab6332SArd Biesheuvel mov x19, x0 79820ab6332SArd Biesheuvel mov x20, x1 79920ab6332SArd Biesheuvel mov x21, x2 80020ab6332SArd Biesheuvel mov x22, x3 80120ab6332SArd Biesheuvel mov x23, x4 80220ab6332SArd Biesheuvel mov x24, x5 80320ab6332SArd Biesheuvel 8047a3b1c6eSArd Biesheuvel0: movi v30.2s, #0x1 8057a3b1c6eSArd Biesheuvel movi v25.2s, #0x87 8067a3b1c6eSArd Biesheuvel uzp1 v30.4s, v30.4s, v25.4s 80720ab6332SArd Biesheuvel ld1 {v25.16b}, [x24] 8081abee99eSArd Biesheuvel 809*39e4716cSJeremy Linton99: adr x16, \do8 8101abee99eSArd Biesheuvel bl __xts_crypt8 8111abee99eSArd Biesheuvel 81220ab6332SArd Biesheuvel ldp q16, q17, [sp, #.Lframe_local_offset] 81320ab6332SArd Biesheuvel ldp q18, q19, [sp, #.Lframe_local_offset + 32] 8141abee99eSArd Biesheuvel 8151abee99eSArd Biesheuvel eor \o0\().16b, \o0\().16b, v25.16b 8161abee99eSArd Biesheuvel eor \o1\().16b, \o1\().16b, v26.16b 8171abee99eSArd Biesheuvel eor \o2\().16b, \o2\().16b, v27.16b 8181abee99eSArd Biesheuvel eor \o3\().16b, \o3\().16b, v28.16b 8191abee99eSArd Biesheuvel 82020ab6332SArd Biesheuvel st1 {\o0\().16b}, [x19], #16 8211abee99eSArd Biesheuvel mov v25.16b, v26.16b 8221abee99eSArd Biesheuvel tbnz x6, #1, 1f 82320ab6332SArd Biesheuvel st1 {\o1\().16b}, [x19], #16 8241abee99eSArd Biesheuvel mov v25.16b, v27.16b 8251abee99eSArd Biesheuvel tbnz x6, #2, 1f 82620ab6332SArd Biesheuvel st1 {\o2\().16b}, [x19], #16 8271abee99eSArd Biesheuvel mov v25.16b, v28.16b 8281abee99eSArd Biesheuvel tbnz x6, #3, 1f 82920ab6332SArd Biesheuvel st1 {\o3\().16b}, [x19], #16 8301abee99eSArd Biesheuvel mov v25.16b, v29.16b 8311abee99eSArd Biesheuvel tbnz x6, #4, 1f 8321abee99eSArd Biesheuvel 8331abee99eSArd Biesheuvel eor \o4\().16b, \o4\().16b, v16.16b 8341abee99eSArd Biesheuvel eor \o5\().16b, \o5\().16b, v17.16b 8351abee99eSArd Biesheuvel eor \o6\().16b, \o6\().16b, v18.16b 8361abee99eSArd Biesheuvel eor \o7\().16b, \o7\().16b, v19.16b 8371abee99eSArd Biesheuvel 83820ab6332SArd Biesheuvel st1 {\o4\().16b}, [x19], #16 8391abee99eSArd Biesheuvel tbnz x6, #5, 1f 84020ab6332SArd Biesheuvel st1 {\o5\().16b}, [x19], #16 8411abee99eSArd Biesheuvel tbnz x6, #6, 1f 84220ab6332SArd Biesheuvel st1 {\o6\().16b}, [x19], #16 8431abee99eSArd Biesheuvel tbnz x6, #7, 1f 84420ab6332SArd Biesheuvel st1 {\o7\().16b}, [x19], #16 8451abee99eSArd Biesheuvel 84620ab6332SArd Biesheuvel cbz x23, 1f 84720ab6332SArd Biesheuvel st1 {v25.16b}, [x24] 8481abee99eSArd Biesheuvel 84920ab6332SArd Biesheuvel cond_yield_neon 0b 85020ab6332SArd Biesheuvel b 99b 85120ab6332SArd Biesheuvel 85220ab6332SArd Biesheuvel1: st1 {v25.16b}, [x24] 85320ab6332SArd Biesheuvel frame_pop 8541abee99eSArd Biesheuvel ret 8551abee99eSArd Biesheuvel .endm 8561abee99eSArd Biesheuvel 8570e89640bSMark BrownSYM_FUNC_START(aesbs_xts_encrypt) 8581abee99eSArd Biesheuvel __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5 8590e89640bSMark BrownSYM_FUNC_END(aesbs_xts_encrypt) 8601abee99eSArd Biesheuvel 8610e89640bSMark BrownSYM_FUNC_START(aesbs_xts_decrypt) 8621abee99eSArd Biesheuvel __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5 8630e89640bSMark BrownSYM_FUNC_END(aesbs_xts_decrypt) 8641abee99eSArd Biesheuvel 8651abee99eSArd Biesheuvel .macro next_ctr, v 8661abee99eSArd Biesheuvel mov \v\().d[1], x8 8671abee99eSArd Biesheuvel adds x8, x8, #1 8681abee99eSArd Biesheuvel mov \v\().d[0], x7 8691abee99eSArd Biesheuvel adc x7, x7, xzr 8701abee99eSArd Biesheuvel rev64 \v\().16b, \v\().16b 8711abee99eSArd Biesheuvel .endm 8721abee99eSArd Biesheuvel 8731abee99eSArd Biesheuvel /* 8741abee99eSArd Biesheuvel * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], 87588a3f582SArd Biesheuvel * int rounds, int blocks, u8 iv[], u8 final[]) 8761abee99eSArd Biesheuvel */ 8770e89640bSMark BrownSYM_FUNC_START(aesbs_ctr_encrypt) 87820ab6332SArd Biesheuvel frame_push 8 8791abee99eSArd Biesheuvel 88020ab6332SArd Biesheuvel mov x19, x0 88120ab6332SArd Biesheuvel mov x20, x1 88220ab6332SArd Biesheuvel mov x21, x2 88320ab6332SArd Biesheuvel mov x22, x3 88420ab6332SArd Biesheuvel mov x23, x4 88520ab6332SArd Biesheuvel mov x24, x5 88620ab6332SArd Biesheuvel mov x25, x6 8871abee99eSArd Biesheuvel 88820ab6332SArd Biesheuvel cmp x25, #0 88920ab6332SArd Biesheuvel cset x26, ne 89020ab6332SArd Biesheuvel add x23, x23, x26 // do one extra block if final 89120ab6332SArd Biesheuvel 89220ab6332SArd Biesheuvel98: ldp x7, x8, [x24] 89320ab6332SArd Biesheuvel ld1 {v0.16b}, [x24] 8941abee99eSArd BiesheuvelCPU_LE( rev x7, x7 ) 8951abee99eSArd BiesheuvelCPU_LE( rev x8, x8 ) 8961abee99eSArd Biesheuvel adds x8, x8, #1 8971abee99eSArd Biesheuvel adc x7, x7, xzr 8981abee99eSArd Biesheuvel 8991abee99eSArd Biesheuvel99: mov x9, #1 90020ab6332SArd Biesheuvel lsl x9, x9, x23 90120ab6332SArd Biesheuvel subs w23, w23, #8 90220ab6332SArd Biesheuvel csel x23, x23, xzr, pl 9031abee99eSArd Biesheuvel csel x9, x9, xzr, le 9041abee99eSArd Biesheuvel 90588a3f582SArd Biesheuvel tbnz x9, #1, 0f 9061abee99eSArd Biesheuvel next_ctr v1 90788a3f582SArd Biesheuvel tbnz x9, #2, 0f 9081abee99eSArd Biesheuvel next_ctr v2 90988a3f582SArd Biesheuvel tbnz x9, #3, 0f 9101abee99eSArd Biesheuvel next_ctr v3 91188a3f582SArd Biesheuvel tbnz x9, #4, 0f 9121abee99eSArd Biesheuvel next_ctr v4 91388a3f582SArd Biesheuvel tbnz x9, #5, 0f 9141abee99eSArd Biesheuvel next_ctr v5 91588a3f582SArd Biesheuvel tbnz x9, #6, 0f 9161abee99eSArd Biesheuvel next_ctr v6 91788a3f582SArd Biesheuvel tbnz x9, #7, 0f 9181abee99eSArd Biesheuvel next_ctr v7 9191abee99eSArd Biesheuvel 92020ab6332SArd Biesheuvel0: mov bskey, x21 92120ab6332SArd Biesheuvel mov rounds, x22 9221abee99eSArd Biesheuvel bl aesbs_encrypt8 9231abee99eSArd Biesheuvel 92420ab6332SArd Biesheuvel lsr x9, x9, x26 // disregard the extra block 9251abee99eSArd Biesheuvel tbnz x9, #0, 0f 9261abee99eSArd Biesheuvel 92720ab6332SArd Biesheuvel ld1 {v8.16b}, [x20], #16 9281abee99eSArd Biesheuvel eor v0.16b, v0.16b, v8.16b 92920ab6332SArd Biesheuvel st1 {v0.16b}, [x19], #16 9301abee99eSArd Biesheuvel tbnz x9, #1, 1f 9311abee99eSArd Biesheuvel 93220ab6332SArd Biesheuvel ld1 {v9.16b}, [x20], #16 9331abee99eSArd Biesheuvel eor v1.16b, v1.16b, v9.16b 93420ab6332SArd Biesheuvel st1 {v1.16b}, [x19], #16 9351abee99eSArd Biesheuvel tbnz x9, #2, 2f 9361abee99eSArd Biesheuvel 93720ab6332SArd Biesheuvel ld1 {v10.16b}, [x20], #16 9381abee99eSArd Biesheuvel eor v4.16b, v4.16b, v10.16b 93920ab6332SArd Biesheuvel st1 {v4.16b}, [x19], #16 9401abee99eSArd Biesheuvel tbnz x9, #3, 3f 9411abee99eSArd Biesheuvel 94220ab6332SArd Biesheuvel ld1 {v11.16b}, [x20], #16 9431abee99eSArd Biesheuvel eor v6.16b, v6.16b, v11.16b 94420ab6332SArd Biesheuvel st1 {v6.16b}, [x19], #16 9451abee99eSArd Biesheuvel tbnz x9, #4, 4f 9461abee99eSArd Biesheuvel 94720ab6332SArd Biesheuvel ld1 {v12.16b}, [x20], #16 9481abee99eSArd Biesheuvel eor v3.16b, v3.16b, v12.16b 94920ab6332SArd Biesheuvel st1 {v3.16b}, [x19], #16 9501abee99eSArd Biesheuvel tbnz x9, #5, 5f 9511abee99eSArd Biesheuvel 95220ab6332SArd Biesheuvel ld1 {v13.16b}, [x20], #16 9531abee99eSArd Biesheuvel eor v7.16b, v7.16b, v13.16b 95420ab6332SArd Biesheuvel st1 {v7.16b}, [x19], #16 9551abee99eSArd Biesheuvel tbnz x9, #6, 6f 9561abee99eSArd Biesheuvel 95720ab6332SArd Biesheuvel ld1 {v14.16b}, [x20], #16 9581abee99eSArd Biesheuvel eor v2.16b, v2.16b, v14.16b 95920ab6332SArd Biesheuvel st1 {v2.16b}, [x19], #16 9601abee99eSArd Biesheuvel tbnz x9, #7, 7f 9611abee99eSArd Biesheuvel 96220ab6332SArd Biesheuvel ld1 {v15.16b}, [x20], #16 9631abee99eSArd Biesheuvel eor v5.16b, v5.16b, v15.16b 96420ab6332SArd Biesheuvel st1 {v5.16b}, [x19], #16 9651abee99eSArd Biesheuvel 96688a3f582SArd Biesheuvel8: next_ctr v0 96720ab6332SArd Biesheuvel st1 {v0.16b}, [x24] 96812455e32SEric Biggers cbz x23, .Lctr_done 9691abee99eSArd Biesheuvel 97020ab6332SArd Biesheuvel cond_yield_neon 98b 97120ab6332SArd Biesheuvel b 99b 97220ab6332SArd Biesheuvel 97312455e32SEric Biggers.Lctr_done: 97412455e32SEric Biggers frame_pop 9751abee99eSArd Biesheuvel ret 9761abee99eSArd Biesheuvel 9771abee99eSArd Biesheuvel /* 97888a3f582SArd Biesheuvel * If we are handling the tail of the input (x6 != NULL), return the 97988a3f582SArd Biesheuvel * final keystream block back to the caller. 9801abee99eSArd Biesheuvel */ 98112455e32SEric Biggers0: cbz x25, 8b 98212455e32SEric Biggers st1 {v0.16b}, [x25] 98312455e32SEric Biggers b 8b 98420ab6332SArd Biesheuvel1: cbz x25, 8b 98520ab6332SArd Biesheuvel st1 {v1.16b}, [x25] 9861abee99eSArd Biesheuvel b 8b 98720ab6332SArd Biesheuvel2: cbz x25, 8b 98820ab6332SArd Biesheuvel st1 {v4.16b}, [x25] 9891abee99eSArd Biesheuvel b 8b 99020ab6332SArd Biesheuvel3: cbz x25, 8b 99120ab6332SArd Biesheuvel st1 {v6.16b}, [x25] 9921abee99eSArd Biesheuvel b 8b 99320ab6332SArd Biesheuvel4: cbz x25, 8b 99420ab6332SArd Biesheuvel st1 {v3.16b}, [x25] 9951abee99eSArd Biesheuvel b 8b 99620ab6332SArd Biesheuvel5: cbz x25, 8b 99720ab6332SArd Biesheuvel st1 {v7.16b}, [x25] 9981abee99eSArd Biesheuvel b 8b 99920ab6332SArd Biesheuvel6: cbz x25, 8b 100020ab6332SArd Biesheuvel st1 {v2.16b}, [x25] 10011abee99eSArd Biesheuvel b 8b 100220ab6332SArd Biesheuvel7: cbz x25, 8b 100320ab6332SArd Biesheuvel st1 {v5.16b}, [x25] 10041abee99eSArd Biesheuvel b 8b 10050e89640bSMark BrownSYM_FUNC_END(aesbs_ctr_encrypt) 1006