14b908403SEric Biggers/* SPDX-License-Identifier: GPL-2.0-only */ 24b908403SEric Biggers/* 34b908403SEric Biggers * Chaining mode wrappers for AES 44b908403SEric Biggers * 54b908403SEric Biggers * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 64b908403SEric Biggers */ 74b908403SEric Biggers 84b908403SEric Biggers/* included by aes-ce.S and aes-neon.S */ 94b908403SEric Biggers 104b908403SEric Biggers .text 114b908403SEric Biggers .align 4 124b908403SEric Biggers 134b908403SEric Biggers#ifndef MAX_STRIDE 144b908403SEric Biggers#define MAX_STRIDE 4 154b908403SEric Biggers#endif 164b908403SEric Biggers 174b908403SEric Biggers#if MAX_STRIDE == 4 184b908403SEric Biggers#define ST4(x...) x 194b908403SEric Biggers#define ST5(x...) 204b908403SEric Biggers#else 214b908403SEric Biggers#define ST4(x...) 224b908403SEric Biggers#define ST5(x...) x 234b908403SEric Biggers#endif 244b908403SEric Biggers 254b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_encrypt_block4x) 264b908403SEric Biggers encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 274b908403SEric Biggers ret 284b908403SEric BiggersSYM_FUNC_END(aes_encrypt_block4x) 294b908403SEric Biggers 304b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_decrypt_block4x) 314b908403SEric Biggers decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 324b908403SEric Biggers ret 334b908403SEric BiggersSYM_FUNC_END(aes_decrypt_block4x) 344b908403SEric Biggers 354b908403SEric Biggers#if MAX_STRIDE == 5 364b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_encrypt_block5x) 374b908403SEric Biggers encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 384b908403SEric Biggers ret 394b908403SEric BiggersSYM_FUNC_END(aes_encrypt_block5x) 404b908403SEric Biggers 414b908403SEric BiggersSYM_FUNC_START_LOCAL(aes_decrypt_block5x) 424b908403SEric Biggers decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 434b908403SEric Biggers ret 444b908403SEric BiggersSYM_FUNC_END(aes_decrypt_block5x) 454b908403SEric Biggers#endif 464b908403SEric Biggers 474b908403SEric Biggers /* 484b908403SEric Biggers * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 494b908403SEric Biggers * int blocks) 504b908403SEric Biggers * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 514b908403SEric Biggers * int blocks) 524b908403SEric Biggers */ 534b908403SEric Biggers 544b908403SEric BiggersAES_FUNC_START(aes_ecb_encrypt) 554b908403SEric Biggers frame_push 0 564b908403SEric Biggers 574b908403SEric Biggers enc_prepare w3, x2, x5 584b908403SEric Biggers 594b908403SEric Biggers.LecbencloopNx: 604b908403SEric Biggers subs w4, w4, #MAX_STRIDE 614b908403SEric Biggers bmi .Lecbenc1x 624b908403SEric Biggers ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 634b908403SEric BiggersST4( bl aes_encrypt_block4x ) 644b908403SEric BiggersST5( ld1 {v4.16b}, [x1], #16 ) 654b908403SEric BiggersST5( bl aes_encrypt_block5x ) 664b908403SEric Biggers st1 {v0.16b-v3.16b}, [x0], #64 674b908403SEric BiggersST5( st1 {v4.16b}, [x0], #16 ) 684b908403SEric Biggers b .LecbencloopNx 694b908403SEric Biggers.Lecbenc1x: 704b908403SEric Biggers adds w4, w4, #MAX_STRIDE 714b908403SEric Biggers beq .Lecbencout 724b908403SEric Biggers.Lecbencloop: 734b908403SEric Biggers ld1 {v0.16b}, [x1], #16 /* get next pt block */ 744b908403SEric Biggers encrypt_block v0, w3, x2, x5, w6 754b908403SEric Biggers st1 {v0.16b}, [x0], #16 764b908403SEric Biggers subs w4, w4, #1 774b908403SEric Biggers bne .Lecbencloop 784b908403SEric Biggers.Lecbencout: 794b908403SEric Biggers frame_pop 804b908403SEric Biggers ret 814b908403SEric BiggersAES_FUNC_END(aes_ecb_encrypt) 824b908403SEric Biggers 834b908403SEric Biggers 844b908403SEric BiggersAES_FUNC_START(aes_ecb_decrypt) 854b908403SEric Biggers frame_push 0 864b908403SEric Biggers 874b908403SEric Biggers dec_prepare w3, x2, x5 884b908403SEric Biggers 894b908403SEric Biggers.LecbdecloopNx: 904b908403SEric Biggers subs w4, w4, #MAX_STRIDE 914b908403SEric Biggers bmi .Lecbdec1x 924b908403SEric Biggers ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 934b908403SEric BiggersST4( bl aes_decrypt_block4x ) 944b908403SEric BiggersST5( ld1 {v4.16b}, [x1], #16 ) 954b908403SEric BiggersST5( bl aes_decrypt_block5x ) 964b908403SEric Biggers st1 {v0.16b-v3.16b}, [x0], #64 974b908403SEric BiggersST5( st1 {v4.16b}, [x0], #16 ) 984b908403SEric Biggers b .LecbdecloopNx 994b908403SEric Biggers.Lecbdec1x: 1004b908403SEric Biggers adds w4, w4, #MAX_STRIDE 1014b908403SEric Biggers beq .Lecbdecout 1024b908403SEric Biggers.Lecbdecloop: 1034b908403SEric Biggers ld1 {v0.16b}, [x1], #16 /* get next ct block */ 1044b908403SEric Biggers decrypt_block v0, w3, x2, x5, w6 1054b908403SEric Biggers st1 {v0.16b}, [x0], #16 1064b908403SEric Biggers subs w4, w4, #1 1074b908403SEric Biggers bne .Lecbdecloop 1084b908403SEric Biggers.Lecbdecout: 1094b908403SEric Biggers frame_pop 1104b908403SEric Biggers ret 1114b908403SEric BiggersAES_FUNC_END(aes_ecb_decrypt) 1124b908403SEric Biggers 1134b908403SEric Biggers 1144b908403SEric Biggers /* 1154b908403SEric Biggers * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 1164b908403SEric Biggers * int blocks, u8 iv[]) 1174b908403SEric Biggers * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 1184b908403SEric Biggers * int blocks, u8 iv[]) 1194b908403SEric Biggers * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], 1204b908403SEric Biggers * int rounds, int blocks, u8 iv[], 1214b908403SEric Biggers * u32 const rk2[]); 1224b908403SEric Biggers * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], 1234b908403SEric Biggers * int rounds, int blocks, u8 iv[], 1244b908403SEric Biggers * u32 const rk2[]); 1254b908403SEric Biggers */ 1264b908403SEric Biggers 1274b908403SEric BiggersAES_FUNC_START(aes_essiv_cbc_encrypt) 1284b908403SEric Biggers ld1 {v4.16b}, [x5] /* get iv */ 1294b908403SEric Biggers 1304b908403SEric Biggers mov w8, #14 /* AES-256: 14 rounds */ 1314b908403SEric Biggers enc_prepare w8, x6, x7 1324b908403SEric Biggers encrypt_block v4, w8, x6, x7, w9 1334b908403SEric Biggers enc_switch_key w3, x2, x6 1344b908403SEric Biggers b .Lcbcencloop4x 1354b908403SEric Biggers 1364b908403SEric BiggersAES_FUNC_START(aes_cbc_encrypt) 1374b908403SEric Biggers ld1 {v4.16b}, [x5] /* get iv */ 1384b908403SEric Biggers enc_prepare w3, x2, x6 1394b908403SEric Biggers 1404b908403SEric Biggers.Lcbcencloop4x: 1414b908403SEric Biggers subs w4, w4, #4 1424b908403SEric Biggers bmi .Lcbcenc1x 1434b908403SEric Biggers ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 1444b908403SEric Biggers eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ 1454b908403SEric Biggers encrypt_block v0, w3, x2, x6, w7 1464b908403SEric Biggers eor v1.16b, v1.16b, v0.16b 1474b908403SEric Biggers encrypt_block v1, w3, x2, x6, w7 1484b908403SEric Biggers eor v2.16b, v2.16b, v1.16b 1494b908403SEric Biggers encrypt_block v2, w3, x2, x6, w7 1504b908403SEric Biggers eor v3.16b, v3.16b, v2.16b 1514b908403SEric Biggers encrypt_block v3, w3, x2, x6, w7 1524b908403SEric Biggers st1 {v0.16b-v3.16b}, [x0], #64 1534b908403SEric Biggers mov v4.16b, v3.16b 1544b908403SEric Biggers b .Lcbcencloop4x 1554b908403SEric Biggers.Lcbcenc1x: 1564b908403SEric Biggers adds w4, w4, #4 1574b908403SEric Biggers beq .Lcbcencout 1584b908403SEric Biggers.Lcbcencloop: 1594b908403SEric Biggers ld1 {v0.16b}, [x1], #16 /* get next pt block */ 1604b908403SEric Biggers eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ 1614b908403SEric Biggers encrypt_block v4, w3, x2, x6, w7 1624b908403SEric Biggers st1 {v4.16b}, [x0], #16 1634b908403SEric Biggers subs w4, w4, #1 1644b908403SEric Biggers bne .Lcbcencloop 1654b908403SEric Biggers.Lcbcencout: 1664b908403SEric Biggers st1 {v4.16b}, [x5] /* return iv */ 1674b908403SEric Biggers ret 1684b908403SEric BiggersAES_FUNC_END(aes_cbc_encrypt) 1694b908403SEric BiggersAES_FUNC_END(aes_essiv_cbc_encrypt) 1704b908403SEric Biggers 1714b908403SEric BiggersAES_FUNC_START(aes_essiv_cbc_decrypt) 1724b908403SEric Biggers ld1 {cbciv.16b}, [x5] /* get iv */ 1734b908403SEric Biggers 1744b908403SEric Biggers mov w8, #14 /* AES-256: 14 rounds */ 1754b908403SEric Biggers enc_prepare w8, x6, x7 1764b908403SEric Biggers encrypt_block cbciv, w8, x6, x7, w9 1774b908403SEric Biggers b .Lessivcbcdecstart 1784b908403SEric Biggers 1794b908403SEric BiggersAES_FUNC_START(aes_cbc_decrypt) 1804b908403SEric Biggers ld1 {cbciv.16b}, [x5] /* get iv */ 1814b908403SEric Biggers.Lessivcbcdecstart: 1824b908403SEric Biggers frame_push 0 1834b908403SEric Biggers dec_prepare w3, x2, x6 1844b908403SEric Biggers 1854b908403SEric Biggers.LcbcdecloopNx: 1864b908403SEric Biggers subs w4, w4, #MAX_STRIDE 1874b908403SEric Biggers bmi .Lcbcdec1x 1884b908403SEric Biggers ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 1894b908403SEric Biggers#if MAX_STRIDE == 5 1904b908403SEric Biggers ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ 1914b908403SEric Biggers mov v5.16b, v0.16b 1924b908403SEric Biggers mov v6.16b, v1.16b 1934b908403SEric Biggers mov v7.16b, v2.16b 1944b908403SEric Biggers bl aes_decrypt_block5x 1954b908403SEric Biggers sub x1, x1, #32 1964b908403SEric Biggers eor v0.16b, v0.16b, cbciv.16b 1974b908403SEric Biggers eor v1.16b, v1.16b, v5.16b 1984b908403SEric Biggers ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ 1994b908403SEric Biggers ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 2004b908403SEric Biggers eor v2.16b, v2.16b, v6.16b 2014b908403SEric Biggers eor v3.16b, v3.16b, v7.16b 2024b908403SEric Biggers eor v4.16b, v4.16b, v5.16b 2034b908403SEric Biggers#else 2044b908403SEric Biggers mov v4.16b, v0.16b 2054b908403SEric Biggers mov v5.16b, v1.16b 2064b908403SEric Biggers mov v6.16b, v2.16b 2074b908403SEric Biggers bl aes_decrypt_block4x 2084b908403SEric Biggers sub x1, x1, #16 2094b908403SEric Biggers eor v0.16b, v0.16b, cbciv.16b 2104b908403SEric Biggers eor v1.16b, v1.16b, v4.16b 2114b908403SEric Biggers ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ 2124b908403SEric Biggers eor v2.16b, v2.16b, v5.16b 2134b908403SEric Biggers eor v3.16b, v3.16b, v6.16b 2144b908403SEric Biggers#endif 2154b908403SEric Biggers st1 {v0.16b-v3.16b}, [x0], #64 2164b908403SEric BiggersST5( st1 {v4.16b}, [x0], #16 ) 2174b908403SEric Biggers b .LcbcdecloopNx 2184b908403SEric Biggers.Lcbcdec1x: 2194b908403SEric Biggers adds w4, w4, #MAX_STRIDE 2204b908403SEric Biggers beq .Lcbcdecout 2214b908403SEric Biggers.Lcbcdecloop: 2224b908403SEric Biggers ld1 {v1.16b}, [x1], #16 /* get next ct block */ 2234b908403SEric Biggers mov v0.16b, v1.16b /* ...and copy to v0 */ 2244b908403SEric Biggers decrypt_block v0, w3, x2, x6, w7 2254b908403SEric Biggers eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ 2264b908403SEric Biggers mov cbciv.16b, v1.16b /* ct is next iv */ 2274b908403SEric Biggers st1 {v0.16b}, [x0], #16 2284b908403SEric Biggers subs w4, w4, #1 2294b908403SEric Biggers bne .Lcbcdecloop 2304b908403SEric Biggers.Lcbcdecout: 2314b908403SEric Biggers st1 {cbciv.16b}, [x5] /* return iv */ 2324b908403SEric Biggers frame_pop 2334b908403SEric Biggers ret 2344b908403SEric BiggersAES_FUNC_END(aes_cbc_decrypt) 2354b908403SEric BiggersAES_FUNC_END(aes_essiv_cbc_decrypt) 2364b908403SEric Biggers 2374b908403SEric Biggers 2384b908403SEric Biggers /* 2394b908403SEric Biggers * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], 2404b908403SEric Biggers * int rounds, int bytes, u8 const iv[]) 2414b908403SEric Biggers * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], 2424b908403SEric Biggers * int rounds, int bytes, u8 const iv[]) 2434b908403SEric Biggers */ 2444b908403SEric Biggers 2454b908403SEric BiggersAES_FUNC_START(aes_cbc_cts_encrypt) 2464b908403SEric Biggers adr_l x8, .Lcts_permute_table 2474b908403SEric Biggers sub x4, x4, #16 2484b908403SEric Biggers add x9, x8, #32 2494b908403SEric Biggers add x8, x8, x4 2504b908403SEric Biggers sub x9, x9, x4 2514b908403SEric Biggers ld1 {v3.16b}, [x8] 2524b908403SEric Biggers ld1 {v4.16b}, [x9] 2534b908403SEric Biggers 2544b908403SEric Biggers ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 2554b908403SEric Biggers ld1 {v1.16b}, [x1] 2564b908403SEric Biggers 2574b908403SEric Biggers ld1 {v5.16b}, [x5] /* get iv */ 2584b908403SEric Biggers enc_prepare w3, x2, x6 2594b908403SEric Biggers 2604b908403SEric Biggers eor v0.16b, v0.16b, v5.16b /* xor with iv */ 2614b908403SEric Biggers tbl v1.16b, {v1.16b}, v4.16b 2624b908403SEric Biggers encrypt_block v0, w3, x2, x6, w7 2634b908403SEric Biggers 2644b908403SEric Biggers eor v1.16b, v1.16b, v0.16b 2654b908403SEric Biggers tbl v0.16b, {v0.16b}, v3.16b 2664b908403SEric Biggers encrypt_block v1, w3, x2, x6, w7 2674b908403SEric Biggers 2684b908403SEric Biggers add x4, x0, x4 2694b908403SEric Biggers st1 {v0.16b}, [x4] /* overlapping stores */ 2704b908403SEric Biggers st1 {v1.16b}, [x0] 2714b908403SEric Biggers ret 2724b908403SEric BiggersAES_FUNC_END(aes_cbc_cts_encrypt) 2734b908403SEric Biggers 2744b908403SEric BiggersAES_FUNC_START(aes_cbc_cts_decrypt) 2754b908403SEric Biggers adr_l x8, .Lcts_permute_table 2764b908403SEric Biggers sub x4, x4, #16 2774b908403SEric Biggers add x9, x8, #32 2784b908403SEric Biggers add x8, x8, x4 2794b908403SEric Biggers sub x9, x9, x4 2804b908403SEric Biggers ld1 {v3.16b}, [x8] 2814b908403SEric Biggers ld1 {v4.16b}, [x9] 2824b908403SEric Biggers 2834b908403SEric Biggers ld1 {v0.16b}, [x1], x4 /* overlapping loads */ 2844b908403SEric Biggers ld1 {v1.16b}, [x1] 2854b908403SEric Biggers 2864b908403SEric Biggers ld1 {v5.16b}, [x5] /* get iv */ 2874b908403SEric Biggers dec_prepare w3, x2, x6 2884b908403SEric Biggers 2894b908403SEric Biggers decrypt_block v0, w3, x2, x6, w7 2904b908403SEric Biggers tbl v2.16b, {v0.16b}, v3.16b 2914b908403SEric Biggers eor v2.16b, v2.16b, v1.16b 2924b908403SEric Biggers 2934b908403SEric Biggers tbx v0.16b, {v1.16b}, v4.16b 2944b908403SEric Biggers decrypt_block v0, w3, x2, x6, w7 2954b908403SEric Biggers eor v0.16b, v0.16b, v5.16b /* xor with iv */ 2964b908403SEric Biggers 2974b908403SEric Biggers add x4, x0, x4 2984b908403SEric Biggers st1 {v2.16b}, [x4] /* overlapping stores */ 2994b908403SEric Biggers st1 {v0.16b}, [x0] 3004b908403SEric Biggers ret 3014b908403SEric BiggersAES_FUNC_END(aes_cbc_cts_decrypt) 3024b908403SEric Biggers 3034b908403SEric Biggers .section ".rodata", "a" 3044b908403SEric Biggers .align 6 3054b908403SEric Biggers.Lcts_permute_table: 3064b908403SEric Biggers .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 3074b908403SEric Biggers .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 3084b908403SEric Biggers .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 3094b908403SEric Biggers .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf 3104b908403SEric Biggers .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 3114b908403SEric Biggers .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 3124b908403SEric Biggers .previous 3134b908403SEric Biggers 3144b908403SEric Biggers /* 3154b908403SEric Biggers * This macro generates the code for CTR and XCTR mode. 3164b908403SEric Biggers */ 3174b908403SEric Biggers.macro ctr_encrypt xctr 3184b908403SEric Biggers // Arguments 3194b908403SEric Biggers OUT .req x0 3204b908403SEric Biggers IN .req x1 3214b908403SEric Biggers KEY .req x2 3224b908403SEric Biggers ROUNDS_W .req w3 3234b908403SEric Biggers BYTES_W .req w4 3244b908403SEric Biggers IV .req x5 3254b908403SEric Biggers BYTE_CTR_W .req w6 // XCTR only 3264b908403SEric Biggers // Intermediate values 3274b908403SEric Biggers CTR_W .req w11 // XCTR only 3284b908403SEric Biggers CTR .req x11 // XCTR only 3294b908403SEric Biggers IV_PART .req x12 3304b908403SEric Biggers BLOCKS .req x13 3314b908403SEric Biggers BLOCKS_W .req w13 3324b908403SEric Biggers 3334b908403SEric Biggers frame_push 0 3344b908403SEric Biggers 3354b908403SEric Biggers enc_prepare ROUNDS_W, KEY, IV_PART 3364b908403SEric Biggers ld1 {vctr.16b}, [IV] 3374b908403SEric Biggers 3384b908403SEric Biggers /* 3394b908403SEric Biggers * Keep 64 bits of the IV in a register. For CTR mode this lets us 3404b908403SEric Biggers * easily increment the IV. For XCTR mode this lets us efficiently XOR 3414b908403SEric Biggers * the 64-bit counter with the IV. 3424b908403SEric Biggers */ 3434b908403SEric Biggers .if \xctr 3444b908403SEric Biggers umov IV_PART, vctr.d[0] 3454b908403SEric Biggers lsr CTR_W, BYTE_CTR_W, #4 3464b908403SEric Biggers .else 3474b908403SEric Biggers umov IV_PART, vctr.d[1] 3484b908403SEric Biggers rev IV_PART, IV_PART 3494b908403SEric Biggers .endif 3504b908403SEric Biggers 3514b908403SEric Biggers.LctrloopNx\xctr: 3524b908403SEric Biggers add BLOCKS_W, BYTES_W, #15 3534b908403SEric Biggers sub BYTES_W, BYTES_W, #MAX_STRIDE << 4 3544b908403SEric Biggers lsr BLOCKS_W, BLOCKS_W, #4 3554b908403SEric Biggers mov w8, #MAX_STRIDE 3564b908403SEric Biggers cmp BLOCKS_W, w8 3574b908403SEric Biggers csel BLOCKS_W, BLOCKS_W, w8, lt 3584b908403SEric Biggers 3594b908403SEric Biggers /* 3604b908403SEric Biggers * Set up the counter values in v0-v{MAX_STRIDE-1}. 3614b908403SEric Biggers * 3624b908403SEric Biggers * If we are encrypting less than MAX_STRIDE blocks, the tail block 3634b908403SEric Biggers * handling code expects the last keystream block to be in 3644b908403SEric Biggers * v{MAX_STRIDE-1}. For example: if encrypting two blocks with 3654b908403SEric Biggers * MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks. 3664b908403SEric Biggers */ 3674b908403SEric Biggers .if \xctr 3684b908403SEric Biggers add CTR, CTR, BLOCKS 3694b908403SEric Biggers .else 3704b908403SEric Biggers adds IV_PART, IV_PART, BLOCKS 3714b908403SEric Biggers .endif 3724b908403SEric Biggers mov v0.16b, vctr.16b 3734b908403SEric Biggers mov v1.16b, vctr.16b 3744b908403SEric Biggers mov v2.16b, vctr.16b 3754b908403SEric Biggers mov v3.16b, vctr.16b 3764b908403SEric BiggersST5( mov v4.16b, vctr.16b ) 3774b908403SEric Biggers .if \xctr 3784b908403SEric Biggers sub x6, CTR, #MAX_STRIDE - 1 3794b908403SEric Biggers sub x7, CTR, #MAX_STRIDE - 2 3804b908403SEric Biggers sub x8, CTR, #MAX_STRIDE - 3 3814b908403SEric Biggers sub x9, CTR, #MAX_STRIDE - 4 3824b908403SEric BiggersST5( sub x10, CTR, #MAX_STRIDE - 5 ) 3834b908403SEric Biggers eor x6, x6, IV_PART 3844b908403SEric Biggers eor x7, x7, IV_PART 3854b908403SEric Biggers eor x8, x8, IV_PART 3864b908403SEric Biggers eor x9, x9, IV_PART 3874b908403SEric BiggersST5( eor x10, x10, IV_PART ) 3884b908403SEric Biggers mov v0.d[0], x6 3894b908403SEric Biggers mov v1.d[0], x7 3904b908403SEric Biggers mov v2.d[0], x8 3914b908403SEric Biggers mov v3.d[0], x9 3924b908403SEric BiggersST5( mov v4.d[0], x10 ) 3934b908403SEric Biggers .else 3944b908403SEric Biggers bcs 0f 3954b908403SEric Biggers .subsection 1 3964b908403SEric Biggers /* 3974b908403SEric Biggers * This subsection handles carries. 3984b908403SEric Biggers * 3994b908403SEric Biggers * Conditional branching here is allowed with respect to time 4004b908403SEric Biggers * invariance since the branches are dependent on the IV instead 4014b908403SEric Biggers * of the plaintext or key. This code is rarely executed in 4024b908403SEric Biggers * practice anyway. 4034b908403SEric Biggers */ 4044b908403SEric Biggers 4054b908403SEric Biggers /* Apply carry to outgoing counter. */ 4064b908403SEric Biggers0: umov x8, vctr.d[0] 4074b908403SEric Biggers rev x8, x8 4084b908403SEric Biggers add x8, x8, #1 4094b908403SEric Biggers rev x8, x8 4104b908403SEric Biggers ins vctr.d[0], x8 4114b908403SEric Biggers 4124b908403SEric Biggers /* 4134b908403SEric Biggers * Apply carry to counter blocks if needed. 4144b908403SEric Biggers * 4154b908403SEric Biggers * Since the carry flag was set, we know 0 <= IV_PART < 4164b908403SEric Biggers * MAX_STRIDE. Using the value of IV_PART we can determine how 4174b908403SEric Biggers * many counter blocks need to be updated. 4184b908403SEric Biggers */ 4194b908403SEric Biggers cbz IV_PART, 2f 4204b908403SEric Biggers adr x16, 1f 4214b908403SEric Biggers sub x16, x16, IV_PART, lsl #3 4224b908403SEric Biggers br x16 4234b908403SEric Biggers bti c 4244b908403SEric Biggers mov v0.d[0], vctr.d[0] 4254b908403SEric Biggers bti c 4264b908403SEric Biggers mov v1.d[0], vctr.d[0] 4274b908403SEric Biggers bti c 4284b908403SEric Biggers mov v2.d[0], vctr.d[0] 4294b908403SEric Biggers bti c 4304b908403SEric Biggers mov v3.d[0], vctr.d[0] 4314b908403SEric BiggersST5( bti c ) 4324b908403SEric BiggersST5( mov v4.d[0], vctr.d[0] ) 4334b908403SEric Biggers1: b 2f 4344b908403SEric Biggers .previous 4354b908403SEric Biggers 4364b908403SEric Biggers2: rev x7, IV_PART 4374b908403SEric Biggers ins vctr.d[1], x7 4384b908403SEric Biggers sub x7, IV_PART, #MAX_STRIDE - 1 4394b908403SEric Biggers sub x8, IV_PART, #MAX_STRIDE - 2 4404b908403SEric Biggers sub x9, IV_PART, #MAX_STRIDE - 3 4414b908403SEric Biggers rev x7, x7 4424b908403SEric Biggers rev x8, x8 4434b908403SEric Biggers mov v1.d[1], x7 4444b908403SEric Biggers rev x9, x9 4454b908403SEric BiggersST5( sub x10, IV_PART, #MAX_STRIDE - 4 ) 4464b908403SEric Biggers mov v2.d[1], x8 4474b908403SEric BiggersST5( rev x10, x10 ) 4484b908403SEric Biggers mov v3.d[1], x9 4494b908403SEric BiggersST5( mov v4.d[1], x10 ) 4504b908403SEric Biggers .endif 4514b908403SEric Biggers 4524b908403SEric Biggers /* 4534b908403SEric Biggers * If there are at least MAX_STRIDE blocks left, XOR the data with 4544b908403SEric Biggers * keystream and store. Otherwise jump to tail handling. 4554b908403SEric Biggers */ 4564b908403SEric Biggers tbnz BYTES_W, #31, .Lctrtail\xctr 4574b908403SEric Biggers ld1 {v5.16b-v7.16b}, [IN], #48 4584b908403SEric BiggersST4( bl aes_encrypt_block4x ) 4594b908403SEric BiggersST5( bl aes_encrypt_block5x ) 4604b908403SEric Biggers eor v0.16b, v5.16b, v0.16b 4614b908403SEric BiggersST4( ld1 {v5.16b}, [IN], #16 ) 4624b908403SEric Biggers eor v1.16b, v6.16b, v1.16b 4634b908403SEric BiggersST5( ld1 {v5.16b-v6.16b}, [IN], #32 ) 4644b908403SEric Biggers eor v2.16b, v7.16b, v2.16b 4654b908403SEric Biggers eor v3.16b, v5.16b, v3.16b 4664b908403SEric BiggersST5( eor v4.16b, v6.16b, v4.16b ) 4674b908403SEric Biggers st1 {v0.16b-v3.16b}, [OUT], #64 4684b908403SEric BiggersST5( st1 {v4.16b}, [OUT], #16 ) 4694b908403SEric Biggers cbz BYTES_W, .Lctrout\xctr 4704b908403SEric Biggers b .LctrloopNx\xctr 4714b908403SEric Biggers 4724b908403SEric Biggers.Lctrout\xctr: 4734b908403SEric Biggers .if !\xctr 4744b908403SEric Biggers st1 {vctr.16b}, [IV] /* return next CTR value */ 4754b908403SEric Biggers .endif 4764b908403SEric Biggers frame_pop 4774b908403SEric Biggers ret 4784b908403SEric Biggers 4794b908403SEric Biggers.Lctrtail\xctr: 4804b908403SEric Biggers /* 4814b908403SEric Biggers * Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext 4824b908403SEric Biggers * 4834b908403SEric Biggers * This code expects the last keystream block to be in v{MAX_STRIDE-1}. 4844b908403SEric Biggers * For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and 4854b908403SEric Biggers * v4 should have the next two counter blocks. 4864b908403SEric Biggers * 4874b908403SEric Biggers * This allows us to store the ciphertext by writing to overlapping 4884b908403SEric Biggers * regions of memory. Any invalid ciphertext blocks get overwritten by 4894b908403SEric Biggers * correctly computed blocks. This approach greatly simplifies the 4904b908403SEric Biggers * logic for storing the ciphertext. 4914b908403SEric Biggers */ 4924b908403SEric Biggers mov x16, #16 4934b908403SEric Biggers ands w7, BYTES_W, #0xf 4944b908403SEric Biggers csel x13, x7, x16, ne 4954b908403SEric Biggers 4964b908403SEric BiggersST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4)) 4974b908403SEric BiggersST5( csel x14, x16, xzr, gt ) 4984b908403SEric Biggers cmp BYTES_W, #48 - (MAX_STRIDE << 4) 4994b908403SEric Biggers csel x15, x16, xzr, gt 5004b908403SEric Biggers cmp BYTES_W, #32 - (MAX_STRIDE << 4) 5014b908403SEric Biggers csel x16, x16, xzr, gt 5024b908403SEric Biggers cmp BYTES_W, #16 - (MAX_STRIDE << 4) 5034b908403SEric Biggers 5044b908403SEric Biggers adr_l x9, .Lcts_permute_table 5054b908403SEric Biggers add x9, x9, x13 5064b908403SEric Biggers ble .Lctrtail1x\xctr 5074b908403SEric Biggers 5084b908403SEric BiggersST5( ld1 {v5.16b}, [IN], x14 ) 5094b908403SEric Biggers ld1 {v6.16b}, [IN], x15 5104b908403SEric Biggers ld1 {v7.16b}, [IN], x16 5114b908403SEric Biggers 5124b908403SEric BiggersST4( bl aes_encrypt_block4x ) 5134b908403SEric BiggersST5( bl aes_encrypt_block5x ) 5144b908403SEric Biggers 5154b908403SEric Biggers ld1 {v8.16b}, [IN], x13 5164b908403SEric Biggers ld1 {v9.16b}, [IN] 5174b908403SEric Biggers ld1 {v10.16b}, [x9] 5184b908403SEric Biggers 5194b908403SEric BiggersST4( eor v6.16b, v6.16b, v0.16b ) 5204b908403SEric BiggersST4( eor v7.16b, v7.16b, v1.16b ) 5214b908403SEric BiggersST4( tbl v3.16b, {v3.16b}, v10.16b ) 5224b908403SEric BiggersST4( eor v8.16b, v8.16b, v2.16b ) 5234b908403SEric BiggersST4( eor v9.16b, v9.16b, v3.16b ) 5244b908403SEric Biggers 5254b908403SEric BiggersST5( eor v5.16b, v5.16b, v0.16b ) 5264b908403SEric BiggersST5( eor v6.16b, v6.16b, v1.16b ) 5274b908403SEric BiggersST5( tbl v4.16b, {v4.16b}, v10.16b ) 5284b908403SEric BiggersST5( eor v7.16b, v7.16b, v2.16b ) 5294b908403SEric BiggersST5( eor v8.16b, v8.16b, v3.16b ) 5304b908403SEric BiggersST5( eor v9.16b, v9.16b, v4.16b ) 5314b908403SEric Biggers 5324b908403SEric BiggersST5( st1 {v5.16b}, [OUT], x14 ) 5334b908403SEric Biggers st1 {v6.16b}, [OUT], x15 5344b908403SEric Biggers st1 {v7.16b}, [OUT], x16 5354b908403SEric Biggers add x13, x13, OUT 5364b908403SEric Biggers st1 {v9.16b}, [x13] // overlapping stores 5374b908403SEric Biggers st1 {v8.16b}, [OUT] 5384b908403SEric Biggers b .Lctrout\xctr 5394b908403SEric Biggers 5404b908403SEric Biggers.Lctrtail1x\xctr: 5414b908403SEric Biggers /* 5424b908403SEric Biggers * Handle <= 16 bytes of plaintext 5434b908403SEric Biggers * 5444b908403SEric Biggers * This code always reads and writes 16 bytes. To avoid out of bounds 5454b908403SEric Biggers * accesses, XCTR and CTR modes must use a temporary buffer when 5464b908403SEric Biggers * encrypting/decrypting less than 16 bytes. 5474b908403SEric Biggers * 5484b908403SEric Biggers * This code is unusual in that it loads the input and stores the output 5494b908403SEric Biggers * relative to the end of the buffers rather than relative to the start. 5504b908403SEric Biggers * This causes unusual behaviour when encrypting/decrypting less than 16 5514b908403SEric Biggers * bytes; the end of the data is expected to be at the end of the 5524b908403SEric Biggers * temporary buffer rather than the start of the data being at the start 5534b908403SEric Biggers * of the temporary buffer. 5544b908403SEric Biggers */ 5554b908403SEric Biggers sub x8, x7, #16 5564b908403SEric Biggers csel x7, x7, x8, eq 5574b908403SEric Biggers add IN, IN, x7 5584b908403SEric Biggers add OUT, OUT, x7 5594b908403SEric Biggers ld1 {v5.16b}, [IN] 5604b908403SEric Biggers ld1 {v6.16b}, [OUT] 5614b908403SEric BiggersST5( mov v3.16b, v4.16b ) 5624b908403SEric Biggers encrypt_block v3, ROUNDS_W, KEY, x8, w7 5634b908403SEric Biggers ld1 {v10.16b-v11.16b}, [x9] 5644b908403SEric Biggers tbl v3.16b, {v3.16b}, v10.16b 5654b908403SEric Biggers sshr v11.16b, v11.16b, #7 5664b908403SEric Biggers eor v5.16b, v5.16b, v3.16b 5674b908403SEric Biggers bif v5.16b, v6.16b, v11.16b 5684b908403SEric Biggers st1 {v5.16b}, [OUT] 5694b908403SEric Biggers b .Lctrout\xctr 5704b908403SEric Biggers 5714b908403SEric Biggers // Arguments 5724b908403SEric Biggers .unreq OUT 5734b908403SEric Biggers .unreq IN 5744b908403SEric Biggers .unreq KEY 5754b908403SEric Biggers .unreq ROUNDS_W 5764b908403SEric Biggers .unreq BYTES_W 5774b908403SEric Biggers .unreq IV 5784b908403SEric Biggers .unreq BYTE_CTR_W // XCTR only 5794b908403SEric Biggers // Intermediate values 5804b908403SEric Biggers .unreq CTR_W // XCTR only 5814b908403SEric Biggers .unreq CTR // XCTR only 5824b908403SEric Biggers .unreq IV_PART 5834b908403SEric Biggers .unreq BLOCKS 5844b908403SEric Biggers .unreq BLOCKS_W 5854b908403SEric Biggers.endm 5864b908403SEric Biggers 5874b908403SEric Biggers /* 5884b908403SEric Biggers * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 5894b908403SEric Biggers * int bytes, u8 ctr[]) 5904b908403SEric Biggers * 5914b908403SEric Biggers * The input and output buffers must always be at least 16 bytes even if 5924b908403SEric Biggers * encrypting/decrypting less than 16 bytes. Otherwise out of bounds 5934b908403SEric Biggers * accesses will occur. The data to be encrypted/decrypted is expected 5944b908403SEric Biggers * to be at the end of this 16-byte temporary buffer rather than the 5954b908403SEric Biggers * start. 5964b908403SEric Biggers */ 5974b908403SEric Biggers 5984b908403SEric BiggersAES_FUNC_START(aes_ctr_encrypt) 5994b908403SEric Biggers ctr_encrypt 0 6004b908403SEric BiggersAES_FUNC_END(aes_ctr_encrypt) 6014b908403SEric Biggers 6024b908403SEric Biggers /* 6034b908403SEric Biggers * aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, 6044b908403SEric Biggers * int bytes, u8 const iv[], int byte_ctr) 6054b908403SEric Biggers * 6064b908403SEric Biggers * The input and output buffers must always be at least 16 bytes even if 6074b908403SEric Biggers * encrypting/decrypting less than 16 bytes. Otherwise out of bounds 6084b908403SEric Biggers * accesses will occur. The data to be encrypted/decrypted is expected 6094b908403SEric Biggers * to be at the end of this 16-byte temporary buffer rather than the 6104b908403SEric Biggers * start. 6114b908403SEric Biggers */ 6124b908403SEric Biggers 6134b908403SEric BiggersAES_FUNC_START(aes_xctr_encrypt) 6144b908403SEric Biggers ctr_encrypt 1 6154b908403SEric BiggersAES_FUNC_END(aes_xctr_encrypt) 6164b908403SEric Biggers 6174b908403SEric Biggers 6184b908403SEric Biggers /* 6194b908403SEric Biggers * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 6204b908403SEric Biggers * int bytes, u8 const rk2[], u8 iv[], int first) 6214b908403SEric Biggers * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, 6224b908403SEric Biggers * int bytes, u8 const rk2[], u8 iv[], int first) 6234b908403SEric Biggers */ 6244b908403SEric Biggers 6254b908403SEric Biggers .macro next_tweak, out, in, tmp 6264b908403SEric Biggers sshr \tmp\().2d, \in\().2d, #63 6274b908403SEric Biggers and \tmp\().16b, \tmp\().16b, xtsmask.16b 6284b908403SEric Biggers add \out\().2d, \in\().2d, \in\().2d 6294b908403SEric Biggers ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 6304b908403SEric Biggers eor \out\().16b, \out\().16b, \tmp\().16b 6314b908403SEric Biggers .endm 6324b908403SEric Biggers 6334b908403SEric Biggers .macro xts_load_mask, tmp 6344b908403SEric Biggers movi xtsmask.2s, #0x1 6354b908403SEric Biggers movi \tmp\().2s, #0x87 6364b908403SEric Biggers uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s 6374b908403SEric Biggers .endm 6384b908403SEric Biggers 6394b908403SEric BiggersAES_FUNC_START(aes_xts_encrypt) 6404b908403SEric Biggers frame_push 0 6414b908403SEric Biggers 6424b908403SEric Biggers ld1 {v4.16b}, [x6] 6434b908403SEric Biggers xts_load_mask v8 6444b908403SEric Biggers cbz w7, .Lxtsencnotfirst 6454b908403SEric Biggers 6464b908403SEric Biggers enc_prepare w3, x5, x8 6474b908403SEric Biggers xts_cts_skip_tw w7, .LxtsencNx 6484b908403SEric Biggers encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 6494b908403SEric Biggers enc_switch_key w3, x2, x8 6504b908403SEric Biggers b .LxtsencNx 6514b908403SEric Biggers 6524b908403SEric Biggers.Lxtsencnotfirst: 6534b908403SEric Biggers enc_prepare w3, x2, x8 6544b908403SEric Biggers.LxtsencloopNx: 6554b908403SEric Biggers next_tweak v4, v4, v8 6564b908403SEric Biggers.LxtsencNx: 6574b908403SEric Biggers subs w4, w4, #64 6584b908403SEric Biggers bmi .Lxtsenc1x 6594b908403SEric Biggers ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ 6604b908403SEric Biggers next_tweak v5, v4, v8 6614b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 6624b908403SEric Biggers next_tweak v6, v5, v8 6634b908403SEric Biggers eor v1.16b, v1.16b, v5.16b 6644b908403SEric Biggers eor v2.16b, v2.16b, v6.16b 6654b908403SEric Biggers next_tweak v7, v6, v8 6664b908403SEric Biggers eor v3.16b, v3.16b, v7.16b 6674b908403SEric Biggers bl aes_encrypt_block4x 6684b908403SEric Biggers eor v3.16b, v3.16b, v7.16b 6694b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 6704b908403SEric Biggers eor v1.16b, v1.16b, v5.16b 6714b908403SEric Biggers eor v2.16b, v2.16b, v6.16b 6724b908403SEric Biggers st1 {v0.16b-v3.16b}, [x0], #64 6734b908403SEric Biggers mov v4.16b, v7.16b 6744b908403SEric Biggers cbz w4, .Lxtsencret 6754b908403SEric Biggers xts_reload_mask v8 6764b908403SEric Biggers b .LxtsencloopNx 6774b908403SEric Biggers.Lxtsenc1x: 6784b908403SEric Biggers adds w4, w4, #64 6794b908403SEric Biggers beq .Lxtsencout 6804b908403SEric Biggers subs w4, w4, #16 6814b908403SEric Biggers bmi .LxtsencctsNx 6824b908403SEric Biggers.Lxtsencloop: 6834b908403SEric Biggers ld1 {v0.16b}, [x1], #16 6844b908403SEric Biggers.Lxtsencctsout: 6854b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 6864b908403SEric Biggers encrypt_block v0, w3, x2, x8, w7 6874b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 6884b908403SEric Biggers cbz w4, .Lxtsencout 6894b908403SEric Biggers subs w4, w4, #16 6904b908403SEric Biggers next_tweak v4, v4, v8 6914b908403SEric Biggers bmi .Lxtsenccts 6924b908403SEric Biggers st1 {v0.16b}, [x0], #16 6934b908403SEric Biggers b .Lxtsencloop 6944b908403SEric Biggers.Lxtsencout: 6954b908403SEric Biggers st1 {v0.16b}, [x0] 6964b908403SEric Biggers.Lxtsencret: 6974b908403SEric Biggers st1 {v4.16b}, [x6] 6984b908403SEric Biggers frame_pop 6994b908403SEric Biggers ret 7004b908403SEric Biggers 7014b908403SEric Biggers.LxtsencctsNx: 7024b908403SEric Biggers mov v0.16b, v3.16b 7034b908403SEric Biggers sub x0, x0, #16 7044b908403SEric Biggers.Lxtsenccts: 7054b908403SEric Biggers adr_l x8, .Lcts_permute_table 7064b908403SEric Biggers 7074b908403SEric Biggers add x1, x1, w4, sxtw /* rewind input pointer */ 7084b908403SEric Biggers add w4, w4, #16 /* # bytes in final block */ 7094b908403SEric Biggers add x9, x8, #32 7104b908403SEric Biggers add x8, x8, x4 7114b908403SEric Biggers sub x9, x9, x4 7124b908403SEric Biggers add x4, x0, x4 /* output address of final block */ 7134b908403SEric Biggers 7144b908403SEric Biggers ld1 {v1.16b}, [x1] /* load final block */ 7154b908403SEric Biggers ld1 {v2.16b}, [x8] 7164b908403SEric Biggers ld1 {v3.16b}, [x9] 7174b908403SEric Biggers 7184b908403SEric Biggers tbl v2.16b, {v0.16b}, v2.16b 7194b908403SEric Biggers tbx v0.16b, {v1.16b}, v3.16b 7204b908403SEric Biggers st1 {v2.16b}, [x4] /* overlapping stores */ 7214b908403SEric Biggers mov w4, wzr 7224b908403SEric Biggers b .Lxtsencctsout 7234b908403SEric BiggersAES_FUNC_END(aes_xts_encrypt) 7244b908403SEric Biggers 7254b908403SEric BiggersAES_FUNC_START(aes_xts_decrypt) 7264b908403SEric Biggers frame_push 0 7274b908403SEric Biggers 7284b908403SEric Biggers /* subtract 16 bytes if we are doing CTS */ 7294b908403SEric Biggers sub w8, w4, #0x10 7304b908403SEric Biggers tst w4, #0xf 7314b908403SEric Biggers csel w4, w4, w8, eq 7324b908403SEric Biggers 7334b908403SEric Biggers ld1 {v4.16b}, [x6] 7344b908403SEric Biggers xts_load_mask v8 7354b908403SEric Biggers xts_cts_skip_tw w7, .Lxtsdecskiptw 7364b908403SEric Biggers cbz w7, .Lxtsdecnotfirst 7374b908403SEric Biggers 7384b908403SEric Biggers enc_prepare w3, x5, x8 7394b908403SEric Biggers encrypt_block v4, w3, x5, x8, w7 /* first tweak */ 7404b908403SEric Biggers.Lxtsdecskiptw: 7414b908403SEric Biggers dec_prepare w3, x2, x8 7424b908403SEric Biggers b .LxtsdecNx 7434b908403SEric Biggers 7444b908403SEric Biggers.Lxtsdecnotfirst: 7454b908403SEric Biggers dec_prepare w3, x2, x8 7464b908403SEric Biggers.LxtsdecloopNx: 7474b908403SEric Biggers next_tweak v4, v4, v8 7484b908403SEric Biggers.LxtsdecNx: 7494b908403SEric Biggers subs w4, w4, #64 7504b908403SEric Biggers bmi .Lxtsdec1x 7514b908403SEric Biggers ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ 7524b908403SEric Biggers next_tweak v5, v4, v8 7534b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 7544b908403SEric Biggers next_tweak v6, v5, v8 7554b908403SEric Biggers eor v1.16b, v1.16b, v5.16b 7564b908403SEric Biggers eor v2.16b, v2.16b, v6.16b 7574b908403SEric Biggers next_tweak v7, v6, v8 7584b908403SEric Biggers eor v3.16b, v3.16b, v7.16b 7594b908403SEric Biggers bl aes_decrypt_block4x 7604b908403SEric Biggers eor v3.16b, v3.16b, v7.16b 7614b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 7624b908403SEric Biggers eor v1.16b, v1.16b, v5.16b 7634b908403SEric Biggers eor v2.16b, v2.16b, v6.16b 7644b908403SEric Biggers st1 {v0.16b-v3.16b}, [x0], #64 7654b908403SEric Biggers mov v4.16b, v7.16b 7664b908403SEric Biggers cbz w4, .Lxtsdecout 7674b908403SEric Biggers xts_reload_mask v8 7684b908403SEric Biggers b .LxtsdecloopNx 7694b908403SEric Biggers.Lxtsdec1x: 7704b908403SEric Biggers adds w4, w4, #64 7714b908403SEric Biggers beq .Lxtsdecout 7724b908403SEric Biggers subs w4, w4, #16 7734b908403SEric Biggers.Lxtsdecloop: 7744b908403SEric Biggers ld1 {v0.16b}, [x1], #16 7754b908403SEric Biggers bmi .Lxtsdeccts 7764b908403SEric Biggers.Lxtsdecctsout: 7774b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 7784b908403SEric Biggers decrypt_block v0, w3, x2, x8, w7 7794b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 7804b908403SEric Biggers st1 {v0.16b}, [x0], #16 7814b908403SEric Biggers cbz w4, .Lxtsdecout 7824b908403SEric Biggers subs w4, w4, #16 7834b908403SEric Biggers next_tweak v4, v4, v8 7844b908403SEric Biggers b .Lxtsdecloop 7854b908403SEric Biggers.Lxtsdecout: 7864b908403SEric Biggers st1 {v4.16b}, [x6] 7874b908403SEric Biggers frame_pop 7884b908403SEric Biggers ret 7894b908403SEric Biggers 7904b908403SEric Biggers.Lxtsdeccts: 7914b908403SEric Biggers adr_l x8, .Lcts_permute_table 7924b908403SEric Biggers 7934b908403SEric Biggers add x1, x1, w4, sxtw /* rewind input pointer */ 7944b908403SEric Biggers add w4, w4, #16 /* # bytes in final block */ 7954b908403SEric Biggers add x9, x8, #32 7964b908403SEric Biggers add x8, x8, x4 7974b908403SEric Biggers sub x9, x9, x4 7984b908403SEric Biggers add x4, x0, x4 /* output address of final block */ 7994b908403SEric Biggers 8004b908403SEric Biggers next_tweak v5, v4, v8 8014b908403SEric Biggers 8024b908403SEric Biggers ld1 {v1.16b}, [x1] /* load final block */ 8034b908403SEric Biggers ld1 {v2.16b}, [x8] 8044b908403SEric Biggers ld1 {v3.16b}, [x9] 8054b908403SEric Biggers 8064b908403SEric Biggers eor v0.16b, v0.16b, v5.16b 8074b908403SEric Biggers decrypt_block v0, w3, x2, x8, w7 8084b908403SEric Biggers eor v0.16b, v0.16b, v5.16b 8094b908403SEric Biggers 8104b908403SEric Biggers tbl v2.16b, {v0.16b}, v2.16b 8114b908403SEric Biggers tbx v0.16b, {v1.16b}, v3.16b 8124b908403SEric Biggers 8134b908403SEric Biggers st1 {v2.16b}, [x4] /* overlapping stores */ 8144b908403SEric Biggers mov w4, wzr 8154b908403SEric Biggers b .Lxtsdecctsout 8164b908403SEric BiggersAES_FUNC_END(aes_xts_decrypt) 8174b908403SEric Biggers 81858286738SEric Biggers#if IS_ENABLED(CONFIG_CRYPTO_LIB_AES_CBC_MACS) 8194b908403SEric Biggers /* 820*11d6bc70SEric Biggers * void aes_mac_update(u8 const in[], u32 const rk[], int rounds, 82158286738SEric Biggers * size_t blocks, u8 dg[], int enc_before, 82258286738SEric Biggers * int enc_after); 8234b908403SEric Biggers */ 8244b908403SEric BiggersAES_FUNC_START(aes_mac_update) 8254b908403SEric Biggers ld1 {v0.16b}, [x4] /* get dg */ 8264b908403SEric Biggers enc_prepare w2, x1, x7 8274b908403SEric Biggers cbz w5, .Lmacloop4x 8284b908403SEric Biggers 8294b908403SEric Biggers encrypt_block v0, w2, x1, x7, w8 8304b908403SEric Biggers 8314b908403SEric Biggers.Lmacloop4x: 83258286738SEric Biggers subs x3, x3, #4 8334b908403SEric Biggers bmi .Lmac1x 8344b908403SEric Biggers ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ 8354b908403SEric Biggers eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 8364b908403SEric Biggers encrypt_block v0, w2, x1, x7, w8 8374b908403SEric Biggers eor v0.16b, v0.16b, v2.16b 8384b908403SEric Biggers encrypt_block v0, w2, x1, x7, w8 8394b908403SEric Biggers eor v0.16b, v0.16b, v3.16b 8404b908403SEric Biggers encrypt_block v0, w2, x1, x7, w8 8414b908403SEric Biggers eor v0.16b, v0.16b, v4.16b 84258286738SEric Biggers cmp x3, xzr 8434b908403SEric Biggers csinv w5, w6, wzr, eq 8444b908403SEric Biggers cbz w5, .Lmacout 8454b908403SEric Biggers encrypt_block v0, w2, x1, x7, w8 8464b908403SEric Biggers st1 {v0.16b}, [x4] /* return dg */ 8474b908403SEric Biggers b .Lmacloop4x 8484b908403SEric Biggers.Lmac1x: 84958286738SEric Biggers add x3, x3, #4 8504b908403SEric Biggers.Lmacloop: 85158286738SEric Biggers cbz x3, .Lmacout 8524b908403SEric Biggers ld1 {v1.16b}, [x0], #16 /* get next pt block */ 8534b908403SEric Biggers eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ 8544b908403SEric Biggers 85558286738SEric Biggers subs x3, x3, #1 8564b908403SEric Biggers csinv w5, w6, wzr, eq 8574b908403SEric Biggers cbz w5, .Lmacout 8584b908403SEric Biggers 8594b908403SEric Biggers.Lmacenc: 8604b908403SEric Biggers encrypt_block v0, w2, x1, x7, w8 8614b908403SEric Biggers b .Lmacloop 8624b908403SEric Biggers 8634b908403SEric Biggers.Lmacout: 8644b908403SEric Biggers st1 {v0.16b}, [x4] /* return dg */ 8654b908403SEric Biggers ret 8664b908403SEric BiggersAES_FUNC_END(aes_mac_update) 86758286738SEric Biggers#endif /* CONFIG_CRYPTO_LIB_AES_CBC_MACS */ 868