1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Scalar AES core transform 4 * 5 * Copyright (C) 2017 Linaro Ltd. 6 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 7 */ 8 9#include <linux/linkage.h> 10#include <asm/assembler.h> 11#include <asm/cache.h> 12 13 .text 14 .align 5 15 16 rk .req r0 17 rounds .req r1 18 in .req r2 19 out .req r3 20 ttab .req ip 21 22 t0 .req lr 23 t1 .req r2 24 t2 .req r3 25 26 .macro __select, out, in, idx 27 .if __LINUX_ARM_ARCH__ < 7 28 and \out, \in, #0xff << (8 * \idx) 29 .else 30 ubfx \out, \in, #(8 * \idx), #8 31 .endif 32 .endm 33 34 .macro __load, out, in, idx, sz, op 35 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 36 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] 37 .else 38 ldr\op \out, [ttab, \in, lsl #\sz] 39 .endif 40 .endm 41 42 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr 43 __select \out0, \in0, 0 44 __select t0, \in1, 1 45 __load \out0, \out0, 0, \sz, \op 46 __load t0, t0, 1, \sz, \op 47 48 .if \enc 49 __select \out1, \in1, 0 50 __select t1, \in2, 1 51 .else 52 __select \out1, \in3, 0 53 __select t1, \in0, 1 54 .endif 55 __load \out1, \out1, 0, \sz, \op 56 __select t2, \in2, 2 57 __load t1, t1, 1, \sz, \op 58 __load t2, t2, 2, \sz, \op 59 60 eor \out0, \out0, t0, ror #24 61 62 __select t0, \in3, 3 63 .if \enc 64 __select \t3, \in3, 2 65 __select \t4, \in0, 3 66 .else 67 __select \t3, \in1, 2 68 __select \t4, \in2, 3 69 .endif 70 __load \t3, \t3, 2, \sz, \op 71 __load t0, t0, 3, \sz, \op 72 __load \t4, \t4, 3, \sz, \op 73 74 .ifnb \oldcpsr 75 /* 76 * This is the final round and we're done with all data-dependent table 77 * lookups, so we can safely re-enable interrupts. 78 */ 79 restore_irqs \oldcpsr 80 .endif 81 82 eor \out1, \out1, t1, ror #24 83 eor \out0, \out0, t2, ror #16 84 ldm rk!, {t1, t2} 85 eor \out1, \out1, \t3, ror #16 86 eor \out0, \out0, t0, ror #8 87 eor \out1, \out1, \t4, ror #8 88 eor \out0, \out0, t1 89 eor \out1, \out1, t2 90 .endm 91 92 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 93 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op 94 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr 95 .endm 96 97 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 98 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op 99 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr 100 .endm 101 102 .macro do_crypt, round, ttab, ltab, bsz 103 push {r3-r11, lr} 104 105 // Load keys first, to reduce latency in case they're not cached yet. 106 ldm rk!, {r8-r11} 107 108 ldr r4, [in] 109 ldr r5, [in, #4] 110 ldr r6, [in, #8] 111 ldr r7, [in, #12] 112 113#ifdef CONFIG_CPU_BIG_ENDIAN 114 rev_l r4, t0 115 rev_l r5, t0 116 rev_l r6, t0 117 rev_l r7, t0 118#endif 119 120 eor r4, r4, r8 121 eor r5, r5, r9 122 eor r6, r6, r10 123 eor r7, r7, r11 124 125 mov_l ttab, \ttab 126 /* 127 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into 128 * L1 cache, assuming cacheline size >= 32. This is a hardening measure 129 * intended to make cache-timing attacks more difficult. They may not 130 * be fully prevented, however; see the paper 131 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf 132 * ("Cache-timing attacks on AES") for a discussion of the many 133 * difficulties involved in writing truly constant-time AES software. 134 */ 135 save_and_disable_irqs t0 136 .set i, 0 137 .rept 1024 / 128 138 ldr r8, [ttab, #i + 0] 139 ldr r9, [ttab, #i + 32] 140 ldr r10, [ttab, #i + 64] 141 ldr r11, [ttab, #i + 96] 142 .set i, i + 128 143 .endr 144 push {t0} // oldcpsr 145 146 tst rounds, #2 147 bne 1f 148 1490: \round r8, r9, r10, r11, r4, r5, r6, r7 150 \round r4, r5, r6, r7, r8, r9, r10, r11 151 1521: subs rounds, rounds, #4 153 \round r8, r9, r10, r11, r4, r5, r6, r7 154 bls 2f 155 \round r4, r5, r6, r7, r8, r9, r10, r11 156 b 0b 157 1582: .ifb \ltab 159 add ttab, ttab, #1 160 .else 161 mov_l ttab, \ltab 162 // Prefetch inverse S-box for final round; see explanation above 163 .set i, 0 164 .rept 256 / 64 165 ldr t0, [ttab, #i + 0] 166 ldr t1, [ttab, #i + 32] 167 .set i, i + 64 168 .endr 169 .endif 170 171 pop {rounds} // oldcpsr 172 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds 173 174#ifdef CONFIG_CPU_BIG_ENDIAN 175 rev_l r4, t0 176 rev_l r5, t0 177 rev_l r6, t0 178 rev_l r7, t0 179#endif 180 181 ldr out, [sp] 182 183 str r4, [out] 184 str r5, [out, #4] 185 str r6, [out, #8] 186 str r7, [out, #12] 187 188 pop {r3-r11, pc} 189 190 .align 3 191 .ltorg 192 .endm 193 194ENTRY(__aes_arm_encrypt) 195 do_crypt fround, crypto_ft_tab,, 2 196ENDPROC(__aes_arm_encrypt) 197 198 .align 5 199ENTRY(__aes_arm_decrypt) 200 do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 201ENDPROC(__aes_arm_decrypt) 202