1/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ 2// 3// This file is dual-licensed, meaning that you can use it under your 4// choice of either of the following two licenses: 5// 6// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. 7// 8// Licensed under the Apache License 2.0 (the "License"). You can obtain 9// a copy in the file LICENSE in the source distribution or at 10// https://www.openssl.org/source/license.html 11// 12// or 13// 14// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> 15// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> 16// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> 17// Copyright 2024 Google LLC 18// All rights reserved. 19// 20// Redistribution and use in source and binary forms, with or without 21// modification, are permitted provided that the following conditions 22// are met: 23// 1. Redistributions of source code must retain the above copyright 24// notice, this list of conditions and the following disclaimer. 25// 2. Redistributions in binary form must reproduce the above copyright 26// notice, this list of conditions and the following disclaimer in the 27// documentation and/or other materials provided with the distribution. 28// 29// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 41// The generated code of this file depends on the following RISC-V extensions: 42// - RV64I 43// - RISC-V Vector ('V') with VLEN >= 128 44// - RISC-V Vector AES block cipher extension ('Zvkned') 45 46#include <linux/linkage.h> 47 48.text 49.option arch, +zvkned 50 51#include "aes-macros.S" 52 53#define KEYP a0 54#define INP a1 55#define OUTP a2 56#define LEN a3 57#define IVP a4 58 59.macro __aes_crypt_zvkned enc, keylen 60 vle32.v v16, (INP) 61 aes_crypt v16, \enc, \keylen 62 vse32.v v16, (OUTP) 63 ret 64.endm 65 66.macro aes_crypt_zvkned enc 67 aes_begin KEYP, 128f, 192f 68 __aes_crypt_zvkned \enc, 256 69128: 70 __aes_crypt_zvkned \enc, 128 71192: 72 __aes_crypt_zvkned \enc, 192 73.endm 74 75// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key, 76// const u8 in[16], u8 out[16]); 77SYM_FUNC_START(aes_encrypt_zvkned) 78 aes_crypt_zvkned 1 79SYM_FUNC_END(aes_encrypt_zvkned) 80 81// Same prototype and calling convention as the encryption function 82SYM_FUNC_START(aes_decrypt_zvkned) 83 aes_crypt_zvkned 0 84SYM_FUNC_END(aes_decrypt_zvkned) 85 86.macro __aes_ecb_crypt enc, keylen 87 srli t0, LEN, 2 88 // t0 is the remaining length in 32-bit words. It's a multiple of 4. 891: 90 vsetvli t1, t0, e32, m8, ta, ma 91 sub t0, t0, t1 // Subtract number of words processed 92 slli t1, t1, 2 // Words to bytes 93 vle32.v v16, (INP) 94 aes_crypt v16, \enc, \keylen 95 vse32.v v16, (OUTP) 96 add INP, INP, t1 97 add OUTP, OUTP, t1 98 bnez t0, 1b 99 100 ret 101.endm 102 103.macro aes_ecb_crypt enc 104 aes_begin KEYP, 128f, 192f 105 __aes_ecb_crypt \enc, 256 106128: 107 __aes_ecb_crypt \enc, 128 108192: 109 __aes_ecb_crypt \enc, 192 110.endm 111 112// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key, 113// const u8 *in, u8 *out, size_t len); 114// 115// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). 116SYM_FUNC_START(aes_ecb_encrypt_zvkned) 117 aes_ecb_crypt 1 118SYM_FUNC_END(aes_ecb_encrypt_zvkned) 119 120// Same prototype and calling convention as the encryption function 121SYM_FUNC_START(aes_ecb_decrypt_zvkned) 122 aes_ecb_crypt 0 123SYM_FUNC_END(aes_ecb_decrypt_zvkned) 124 125.macro aes_cbc_encrypt keylen 126 vle32.v v16, (IVP) // Load IV 1271: 128 vle32.v v17, (INP) // Load plaintext block 129 vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block 130 aes_encrypt v16, \keylen // Encrypt 131 vse32.v v16, (OUTP) // Store ciphertext block 132 addi INP, INP, 16 133 addi OUTP, OUTP, 16 134 addi LEN, LEN, -16 135 bnez LEN, 1b 136 137 vse32.v v16, (IVP) // Store next IV 138 ret 139.endm 140 141.macro aes_cbc_decrypt keylen 142 srli LEN, LEN, 2 // Convert LEN from bytes to words 143 vle32.v v16, (IVP) // Load IV 1441: 145 vsetvli t0, LEN, e32, m4, ta, ma 146 vle32.v v20, (INP) // Load ciphertext blocks 147 vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks 148 addi t1, t0, -4 149 vslidedown.vx v24, v20, t1 // Save last ciphertext block 150 aes_decrypt v20, \keylen // Decrypt the blocks 151 vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks 152 vse32.v v20, (OUTP) // Store plaintext blocks 153 vmv.v.v v16, v24 // Next "IV" is last ciphertext block 154 slli t1, t0, 2 // Words to bytes 155 add INP, INP, t1 156 add OUTP, OUTP, t1 157 sub LEN, LEN, t0 158 bnez LEN, 1b 159 160 vsetivli zero, 4, e32, m1, ta, ma 161 vse32.v v16, (IVP) // Store next IV 162 ret 163.endm 164 165// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key, 166// const u8 *in, u8 *out, size_t len, u8 iv[16]); 167// 168// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). 169SYM_FUNC_START(aes_cbc_encrypt_zvkned) 170 aes_begin KEYP, 128f, 192f 171 aes_cbc_encrypt 256 172128: 173 aes_cbc_encrypt 128 174192: 175 aes_cbc_encrypt 192 176SYM_FUNC_END(aes_cbc_encrypt_zvkned) 177 178// Same prototype and calling convention as the encryption function 179SYM_FUNC_START(aes_cbc_decrypt_zvkned) 180 aes_begin KEYP, 128f, 192f 181 aes_cbc_decrypt 256 182128: 183 aes_cbc_decrypt 128 184192: 185 aes_cbc_decrypt 192 186SYM_FUNC_END(aes_cbc_decrypt_zvkned) 187 188.macro aes_cbc_cts_encrypt keylen 189 190 // CBC-encrypt all blocks except the last. But don't store the 191 // second-to-last block to the output buffer yet, since it will be 192 // handled specially in the ciphertext stealing step. Exception: if the 193 // message is single-block, still encrypt the last (and only) block. 194 li t0, 16 195 j 2f 1961: 197 vse32.v v16, (OUTP) // Store ciphertext block 198 addi OUTP, OUTP, 16 1992: 200 vle32.v v17, (INP) // Load plaintext block 201 vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block 202 aes_encrypt v16, \keylen // Encrypt 203 addi INP, INP, 16 204 addi LEN, LEN, -16 205 bgt LEN, t0, 1b // Repeat if more than one block remains 206 207 // Special case: if the message is a single block, just do CBC. 208 beqz LEN, .Lcts_encrypt_done\@ 209 210 // Encrypt the last two blocks using ciphertext stealing as follows: 211 // C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n]) 212 // C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN] 213 // 214 // C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th 215 // plaintext block. Block n, the last block, may be partial; its length 216 // is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV. 217 // 218 // v16 already contains Encrypt(P[n-1] ^ C[n-2]). 219 // INP points to P[n]. OUTP points to where C[n-1] should go. 220 // To support in-place encryption, load P[n] before storing C[n]. 221 addi t0, OUTP, 16 // Get pointer to where C[n] should go 222 vsetvli zero, LEN, e8, m1, tu, ma 223 vle8.v v17, (INP) // Load P[n] 224 vse8.v v16, (t0) // Store C[n] 225 vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n] 226 vsetivli zero, 4, e32, m1, ta, ma 227 aes_encrypt v16, \keylen 228.Lcts_encrypt_done\@: 229 vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case) 230 ret 231.endm 232 233#define LEN32 t4 // Length of remaining full blocks in 32-bit words 234#define LEN_MOD16 t5 // Length of message in bytes mod 16 235 236.macro aes_cbc_cts_decrypt keylen 237 andi LEN32, LEN, ~15 238 srli LEN32, LEN32, 2 239 andi LEN_MOD16, LEN, 15 240 241 // Save C[n-2] in v28 so that it's available later during the ciphertext 242 // stealing step. If there are fewer than three blocks, C[n-2] means 243 // the IV, otherwise it means the third-to-last ciphertext block. 244 vmv.v.v v28, v16 // IV 245 add t0, LEN, -33 246 bltz t0, .Lcts_decrypt_loop\@ 247 andi t0, t0, ~15 248 add t0, t0, INP 249 vle32.v v28, (t0) 250 251 // CBC-decrypt all full blocks. For the last full block, or the last 2 252 // full blocks if the message is block-aligned, this doesn't write the 253 // correct output blocks (unless the message is only a single block), 254 // because it XORs the wrong values with the raw AES plaintexts. But we 255 // fix this after this loop without redoing the AES decryptions. This 256 // approach allows more of the AES decryptions to be parallelized. 257.Lcts_decrypt_loop\@: 258 vsetvli t0, LEN32, e32, m4, ta, ma 259 addi t1, t0, -4 260 vle32.v v20, (INP) // Load next set of ciphertext blocks 261 vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set 262 vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks 263 vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set 264 aes_decrypt v20, \keylen // Decrypt this set of blocks 265 vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks 266 vse32.v v24, (OUTP) // Store this set of plaintext blocks 267 sub LEN32, LEN32, t0 268 slli t0, t0, 2 // Words to bytes 269 add INP, INP, t0 270 add OUTP, OUTP, t0 271 bnez LEN32, .Lcts_decrypt_loop\@ 272 273 vsetivli zero, 4, e32, m4, ta, ma 274 vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block 275 addi t0, OUTP, -16 // Get pointer to last full plaintext block 276 bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@ 277 278 // Special case: if the message is a single block, just do CBC. 279 li t1, 16 280 beq LEN, t1, .Lcts_decrypt_done\@ 281 282 // Block-aligned message. Just fix up the last 2 blocks. We need: 283 // 284 // P[n-1] = Decrypt(C[n]) ^ C[n-2] 285 // P[n] = Decrypt(C[n-1]) ^ C[n] 286 // 287 // We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28. 288 // Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this 289 // is everything needed to fix the output without re-decrypting blocks. 290 addi t1, OUTP, -32 // Get pointer to where P[n-1] should go 291 vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1] 292 vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2] 293 vse32.v v20, (t1) // Store P[n-1] 294 vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2] 295 j .Lcts_decrypt_finish\@ 296 297.Lcts_decrypt_non_block_aligned\@: 298 // Decrypt the last two blocks using ciphertext stealing as follows: 299 // 300 // P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2] 301 // P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16] 302 // 303 // We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28. 304 vmv.v.v v16, v20 // v16 = Decrypt(C[n-1]) 305 vsetvli zero, LEN_MOD16, e8, m1, tu, ma 306 vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16] 307 vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n] 308 vse8.v v16, (OUTP) // Store P[n] 309 vsetivli zero, 4, e32, m1, ta, ma 310 aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) 311.Lcts_decrypt_finish\@: 312 vxor.vv v20, v20, v28 // XOR with C[n-2] 313 vse32.v v20, (t0) // Store last full plaintext block 314.Lcts_decrypt_done\@: 315 ret 316.endm 317 318.macro aes_cbc_cts_crypt keylen 319 vle32.v v16, (IVP) // Load IV 320 beqz a5, .Lcts_decrypt\@ 321 aes_cbc_cts_encrypt \keylen 322.Lcts_decrypt\@: 323 aes_cbc_cts_decrypt \keylen 324.endm 325 326// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key, 327// const u8 *in, u8 *out, size_t len, 328// const u8 iv[16], bool enc); 329// 330// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS. 331// This is the variant that unconditionally swaps the last two blocks. 332SYM_FUNC_START(aes_cbc_cts_crypt_zvkned) 333 aes_begin KEYP, 128f, 192f 334 aes_cbc_cts_crypt 256 335128: 336 aes_cbc_cts_crypt 128 337192: 338 aes_cbc_cts_crypt 192 339SYM_FUNC_END(aes_cbc_cts_crypt_zvkned) 340