1/* 2 * Scalar AES core transform 3 * 4 * Copyright (C) 2017 Linaro Ltd. 5 * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12#include <linux/linkage.h> 13#include <asm/assembler.h> 14#include <asm/cache.h> 15 16 .text 17 .align 5 18 19 rk .req r0 20 rounds .req r1 21 in .req r2 22 out .req r3 23 ttab .req ip 24 25 t0 .req lr 26 t1 .req r2 27 t2 .req r3 28 29 .macro __select, out, in, idx 30 .if __LINUX_ARM_ARCH__ < 7 31 and \out, \in, #0xff << (8 * \idx) 32 .else 33 ubfx \out, \in, #(8 * \idx), #8 34 .endif 35 .endm 36 37 .macro __load, out, in, idx, sz, op 38 .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 39 ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] 40 .else 41 ldr\op \out, [ttab, \in, lsl #\sz] 42 .endif 43 .endm 44 45 .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr 46 __select \out0, \in0, 0 47 __select t0, \in1, 1 48 __load \out0, \out0, 0, \sz, \op 49 __load t0, t0, 1, \sz, \op 50 51 .if \enc 52 __select \out1, \in1, 0 53 __select t1, \in2, 1 54 .else 55 __select \out1, \in3, 0 56 __select t1, \in0, 1 57 .endif 58 __load \out1, \out1, 0, \sz, \op 59 __select t2, \in2, 2 60 __load t1, t1, 1, \sz, \op 61 __load t2, t2, 2, \sz, \op 62 63 eor \out0, \out0, t0, ror #24 64 65 __select t0, \in3, 3 66 .if \enc 67 __select \t3, \in3, 2 68 __select \t4, \in0, 3 69 .else 70 __select \t3, \in1, 2 71 __select \t4, \in2, 3 72 .endif 73 __load \t3, \t3, 2, \sz, \op 74 __load t0, t0, 3, \sz, \op 75 __load \t4, \t4, 3, \sz, \op 76 77 .ifnb \oldcpsr 78 /* 79 * This is the final round and we're done with all data-dependent table 80 * lookups, so we can safely re-enable interrupts. 81 */ 82 restore_irqs \oldcpsr 83 .endif 84 85 eor \out1, \out1, t1, ror #24 86 eor \out0, \out0, t2, ror #16 87 ldm rk!, {t1, t2} 88 eor \out1, \out1, \t3, ror #16 89 eor \out0, \out0, t0, ror #8 90 eor \out1, \out1, \t4, ror #8 91 eor \out0, \out0, t1 92 eor \out1, \out1, t2 93 .endm 94 95 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 96 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op 97 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr 98 .endm 99 100 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr 101 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op 102 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr 103 .endm 104 105 .macro __rev, out, in 106 .if __LINUX_ARM_ARCH__ < 6 107 lsl t0, \in, #24 108 and t1, \in, #0xff00 109 and t2, \in, #0xff0000 110 orr \out, t0, \in, lsr #24 111 orr \out, \out, t1, lsl #8 112 orr \out, \out, t2, lsr #8 113 .else 114 rev \out, \in 115 .endif 116 .endm 117 118 .macro __adrl, out, sym, c 119 .if __LINUX_ARM_ARCH__ < 7 120 ldr\c \out, =\sym 121 .else 122 movw\c \out, #:lower16:\sym 123 movt\c \out, #:upper16:\sym 124 .endif 125 .endm 126 127 .macro do_crypt, round, ttab, ltab, bsz 128 push {r3-r11, lr} 129 130 // Load keys first, to reduce latency in case they're not cached yet. 131 ldm rk!, {r8-r11} 132 133 ldr r4, [in] 134 ldr r5, [in, #4] 135 ldr r6, [in, #8] 136 ldr r7, [in, #12] 137 138#ifdef CONFIG_CPU_BIG_ENDIAN 139 __rev r4, r4 140 __rev r5, r5 141 __rev r6, r6 142 __rev r7, r7 143#endif 144 145 eor r4, r4, r8 146 eor r5, r5, r9 147 eor r6, r6, r10 148 eor r7, r7, r11 149 150 __adrl ttab, \ttab 151 /* 152 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into 153 * L1 cache, assuming cacheline size >= 32. This is a hardening measure 154 * intended to make cache-timing attacks more difficult. They may not 155 * be fully prevented, however; see the paper 156 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf 157 * ("Cache-timing attacks on AES") for a discussion of the many 158 * difficulties involved in writing truly constant-time AES software. 159 */ 160 save_and_disable_irqs t0 161 .set i, 0 162 .rept 1024 / 128 163 ldr r8, [ttab, #i + 0] 164 ldr r9, [ttab, #i + 32] 165 ldr r10, [ttab, #i + 64] 166 ldr r11, [ttab, #i + 96] 167 .set i, i + 128 168 .endr 169 push {t0} // oldcpsr 170 171 tst rounds, #2 172 bne 1f 173 1740: \round r8, r9, r10, r11, r4, r5, r6, r7 175 \round r4, r5, r6, r7, r8, r9, r10, r11 176 1771: subs rounds, rounds, #4 178 \round r8, r9, r10, r11, r4, r5, r6, r7 179 bls 2f 180 \round r4, r5, r6, r7, r8, r9, r10, r11 181 b 0b 182 1832: .ifb \ltab 184 add ttab, ttab, #1 185 .else 186 __adrl ttab, \ltab 187 // Prefetch inverse S-box for final round; see explanation above 188 .set i, 0 189 .rept 256 / 64 190 ldr t0, [ttab, #i + 0] 191 ldr t1, [ttab, #i + 32] 192 .set i, i + 64 193 .endr 194 .endif 195 196 pop {rounds} // oldcpsr 197 \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds 198 199#ifdef CONFIG_CPU_BIG_ENDIAN 200 __rev r4, r4 201 __rev r5, r5 202 __rev r6, r6 203 __rev r7, r7 204#endif 205 206 ldr out, [sp] 207 208 str r4, [out] 209 str r5, [out, #4] 210 str r6, [out, #8] 211 str r7, [out, #12] 212 213 pop {r3-r11, pc} 214 215 .align 3 216 .ltorg 217 .endm 218 219ENTRY(__aes_arm_encrypt) 220 do_crypt fround, crypto_ft_tab,, 2 221ENDPROC(__aes_arm_encrypt) 222 223 .align 5 224ENTRY(__aes_arm_decrypt) 225 do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0 226ENDPROC(__aes_arm_decrypt) 227 228 .section ".rodata", "a" 229 .align L1_CACHE_SHIFT 230 .type __aes_arm_inverse_sbox, %object 231__aes_arm_inverse_sbox: 232 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 233 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 234 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 235 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 236 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 237 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 238 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 239 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 240 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 241 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 242 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 243 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 244 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 245 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 246 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 247 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 248 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 249 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 250 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 251 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 252 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 253 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 254 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 255 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 256 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 257 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 258 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 259 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 260 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 261 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 262 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 263 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 264 .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox 265