1/* 2 * Scalar AES core transform 3 * 4 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13#include <asm/cache.h> 14 15 .text 16 17 rk .req x0 18 out .req x1 19 in .req x2 20 rounds .req x3 21 tt .req x2 22 23 .macro __pair1, sz, op, reg0, reg1, in0, in1e, in1d, shift 24 .ifc \op\shift, b0 25 ubfiz \reg0, \in0, #2, #8 26 ubfiz \reg1, \in1e, #2, #8 27 .else 28 ubfx \reg0, \in0, #\shift, #8 29 ubfx \reg1, \in1e, #\shift, #8 30 .endif 31 32 /* 33 * AArch64 cannot do byte size indexed loads from a table containing 34 * 32-bit quantities, i.e., 'ldrb w12, [tt, w12, uxtw #2]' is not a 35 * valid instruction. So perform the shift explicitly first for the 36 * high bytes (the low byte is shifted implicitly by using ubfiz rather 37 * than ubfx above) 38 */ 39 .ifnc \op, b 40 ldr \reg0, [tt, \reg0, uxtw #2] 41 ldr \reg1, [tt, \reg1, uxtw #2] 42 .else 43 .if \shift > 0 44 lsl \reg0, \reg0, #2 45 lsl \reg1, \reg1, #2 46 .endif 47 ldrb \reg0, [tt, \reg0, uxtw] 48 ldrb \reg1, [tt, \reg1, uxtw] 49 .endif 50 .endm 51 52 .macro __pair0, sz, op, reg0, reg1, in0, in1e, in1d, shift 53 ubfx \reg0, \in0, #\shift, #8 54 ubfx \reg1, \in1d, #\shift, #8 55 ldr\op \reg0, [tt, \reg0, uxtw #\sz] 56 ldr\op \reg1, [tt, \reg1, uxtw #\sz] 57 .endm 58 59 .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc, sz, op 60 ldp \out0, \out1, [rk], #8 61 62 __pair\enc \sz, \op, w12, w13, \in0, \in1, \in3, 0 63 __pair\enc \sz, \op, w14, w15, \in1, \in2, \in0, 8 64 __pair\enc \sz, \op, w16, w17, \in2, \in3, \in1, 16 65 __pair\enc \sz, \op, \t0, \t1, \in3, \in0, \in2, 24 66 67 eor \out0, \out0, w12 68 eor \out1, \out1, w13 69 eor \out0, \out0, w14, ror #24 70 eor \out1, \out1, w15, ror #24 71 eor \out0, \out0, w16, ror #16 72 eor \out1, \out1, w17, ror #16 73 eor \out0, \out0, \t0, ror #8 74 eor \out1, \out1, \t1, ror #8 75 .endm 76 77 .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op 78 __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op 79 __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op 80 .endm 81 82 .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op 83 __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op 84 __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op 85 .endm 86 87 .macro do_crypt, round, ttab, ltab, bsz 88 ldp w4, w5, [in] 89 ldp w6, w7, [in, #8] 90 ldp w8, w9, [rk], #16 91 ldp w10, w11, [rk, #-8] 92 93CPU_BE( rev w4, w4 ) 94CPU_BE( rev w5, w5 ) 95CPU_BE( rev w6, w6 ) 96CPU_BE( rev w7, w7 ) 97 98 eor w4, w4, w8 99 eor w5, w5, w9 100 eor w6, w6, w10 101 eor w7, w7, w11 102 103 adr_l tt, \ttab 104 105 tbnz rounds, #1, 1f 106 1070: \round w8, w9, w10, w11, w4, w5, w6, w7 108 \round w4, w5, w6, w7, w8, w9, w10, w11 109 1101: subs rounds, rounds, #4 111 \round w8, w9, w10, w11, w4, w5, w6, w7 112 b.ls 3f 1132: \round w4, w5, w6, w7, w8, w9, w10, w11 114 b 0b 1153: adr_l tt, \ltab 116 \round w4, w5, w6, w7, w8, w9, w10, w11, \bsz, b 117 118CPU_BE( rev w4, w4 ) 119CPU_BE( rev w5, w5 ) 120CPU_BE( rev w6, w6 ) 121CPU_BE( rev w7, w7 ) 122 123 stp w4, w5, [out] 124 stp w6, w7, [out, #8] 125 ret 126 .endm 127 128ENTRY(__aes_arm64_encrypt) 129 do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2 130ENDPROC(__aes_arm64_encrypt) 131 132 .align 5 133ENTRY(__aes_arm64_decrypt) 134 do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0 135ENDPROC(__aes_arm64_decrypt) 136 137 .section ".rodata", "a" 138 .align L1_CACHE_SHIFT 139 .type __aes_arm64_inverse_sbox, %object 140__aes_arm64_inverse_sbox: 141 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 142 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 143 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 144 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 145 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 146 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 147 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 148 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 149 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 150 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 151 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 152 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 153 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 154 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 155 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 156 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 157 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 158 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 159 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 160 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 161 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 162 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 163 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 164 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 165 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 166 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 167 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 168 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 169 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 170 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 171 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 172 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 173 .size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox 174