1/* 2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 3 * 4 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14#define AES_ENTRY(func) ENTRY(neon_ ## func) 15#define AES_ENDPROC(func) ENDPROC(neon_ ## func) 16 17 xtsmask .req v7 18 19 .macro xts_reload_mask, tmp 20 xts_load_mask \tmp 21 .endm 22 23 /* multiply by polynomial 'x' in GF(2^8) */ 24 .macro mul_by_x, out, in, temp, const 25 sshr \temp, \in, #7 26 shl \out, \in, #1 27 and \temp, \temp, \const 28 eor \out, \out, \temp 29 .endm 30 31 /* multiply by polynomial 'x^2' in GF(2^8) */ 32 .macro mul_by_x2, out, in, temp, const 33 ushr \temp, \in, #6 34 shl \out, \in, #2 35 pmul \temp, \temp, \const 36 eor \out, \out, \temp 37 .endm 38 39 /* preload the entire Sbox */ 40 .macro prepare, sbox, shiftrows, temp 41 movi v12.16b, #0x1b 42 ldr_l q13, \shiftrows, \temp 43 ldr_l q14, .Lror32by8, \temp 44 adr_l \temp, \sbox 45 ld1 {v16.16b-v19.16b}, [\temp], #64 46 ld1 {v20.16b-v23.16b}, [\temp], #64 47 ld1 {v24.16b-v27.16b}, [\temp], #64 48 ld1 {v28.16b-v31.16b}, [\temp] 49 .endm 50 51 /* do preload for encryption */ 52 .macro enc_prepare, ignore0, ignore1, temp 53 prepare .LForward_Sbox, .LForward_ShiftRows, \temp 54 .endm 55 56 .macro enc_switch_key, ignore0, ignore1, temp 57 /* do nothing */ 58 .endm 59 60 /* do preload for decryption */ 61 .macro dec_prepare, ignore0, ignore1, temp 62 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp 63 .endm 64 65 /* apply SubBytes transformation using the the preloaded Sbox */ 66 .macro sub_bytes, in 67 sub v9.16b, \in\().16b, v15.16b 68 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 69 sub v10.16b, v9.16b, v15.16b 70 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 71 sub v11.16b, v10.16b, v15.16b 72 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 73 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 74 .endm 75 76 /* apply MixColumns transformation */ 77 .macro mix_columns, in, enc 78 .if \enc == 0 79 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 80 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b 81 eor \in\().16b, \in\().16b, v8.16b 82 rev32 v8.8h, v8.8h 83 eor \in\().16b, \in\().16b, v8.16b 84 .endif 85 86 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b 87 rev32 v8.8h, \in\().8h 88 eor v8.16b, v8.16b, v9.16b 89 eor \in\().16b, \in\().16b, v8.16b 90 tbl \in\().16b, {\in\().16b}, v14.16b 91 eor \in\().16b, \in\().16b, v8.16b 92 .endm 93 94 .macro do_block, enc, in, rounds, rk, rkp, i 95 ld1 {v15.4s}, [\rk] 96 add \rkp, \rk, #16 97 mov \i, \rounds 981111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 99 movi v15.16b, #0x40 100 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 101 sub_bytes \in 102 subs \i, \i, #1 103 ld1 {v15.4s}, [\rkp], #16 104 beq 2222f 105 mix_columns \in, \enc 106 b 1111b 1072222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 108 .endm 109 110 .macro encrypt_block, in, rounds, rk, rkp, i 111 do_block 1, \in, \rounds, \rk, \rkp, \i 112 .endm 113 114 .macro decrypt_block, in, rounds, rk, rkp, i 115 do_block 0, \in, \rounds, \rk, \rkp, \i 116 .endm 117 118 /* 119 * Interleaved versions: functionally equivalent to the 120 * ones above, but applied to 2 or 4 AES states in parallel. 121 */ 122 123 .macro sub_bytes_2x, in0, in1 124 sub v8.16b, \in0\().16b, v15.16b 125 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 126 sub v9.16b, \in1\().16b, v15.16b 127 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 128 sub v10.16b, v8.16b, v15.16b 129 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 130 sub v11.16b, v9.16b, v15.16b 131 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 132 sub v8.16b, v10.16b, v15.16b 133 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b 134 sub v9.16b, v11.16b, v15.16b 135 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b 136 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 137 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 138 .endm 139 140 .macro sub_bytes_4x, in0, in1, in2, in3 141 sub v8.16b, \in0\().16b, v15.16b 142 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 143 sub v9.16b, \in1\().16b, v15.16b 144 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 145 sub v10.16b, \in2\().16b, v15.16b 146 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 147 sub v11.16b, \in3\().16b, v15.16b 148 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 149 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 150 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 151 sub v8.16b, v8.16b, v15.16b 152 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 153 sub v9.16b, v9.16b, v15.16b 154 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 155 sub v10.16b, v10.16b, v15.16b 156 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 157 sub v11.16b, v11.16b, v15.16b 158 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 159 sub v8.16b, v8.16b, v15.16b 160 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 161 sub v9.16b, v9.16b, v15.16b 162 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 163 sub v10.16b, v10.16b, v15.16b 164 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 165 sub v11.16b, v11.16b, v15.16b 166 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 167 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 168 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 169 .endm 170 171 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 172 sshr \tmp0\().16b, \in0\().16b, #7 173 shl \out0\().16b, \in0\().16b, #1 174 sshr \tmp1\().16b, \in1\().16b, #7 175 and \tmp0\().16b, \tmp0\().16b, \const\().16b 176 shl \out1\().16b, \in1\().16b, #1 177 and \tmp1\().16b, \tmp1\().16b, \const\().16b 178 eor \out0\().16b, \out0\().16b, \tmp0\().16b 179 eor \out1\().16b, \out1\().16b, \tmp1\().16b 180 .endm 181 182 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const 183 ushr \tmp0\().16b, \in0\().16b, #6 184 shl \out0\().16b, \in0\().16b, #2 185 ushr \tmp1\().16b, \in1\().16b, #6 186 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b 187 shl \out1\().16b, \in1\().16b, #2 188 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b 189 eor \out0\().16b, \out0\().16b, \tmp0\().16b 190 eor \out1\().16b, \out1\().16b, \tmp1\().16b 191 .endm 192 193 .macro mix_columns_2x, in0, in1, enc 194 .if \enc == 0 195 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 196 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12 197 eor \in0\().16b, \in0\().16b, v8.16b 198 rev32 v8.8h, v8.8h 199 eor \in1\().16b, \in1\().16b, v9.16b 200 rev32 v9.8h, v9.8h 201 eor \in0\().16b, \in0\().16b, v8.16b 202 eor \in1\().16b, \in1\().16b, v9.16b 203 .endif 204 205 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12 206 rev32 v10.8h, \in0\().8h 207 rev32 v11.8h, \in1\().8h 208 eor v10.16b, v10.16b, v8.16b 209 eor v11.16b, v11.16b, v9.16b 210 eor \in0\().16b, \in0\().16b, v10.16b 211 eor \in1\().16b, \in1\().16b, v11.16b 212 tbl \in0\().16b, {\in0\().16b}, v14.16b 213 tbl \in1\().16b, {\in1\().16b}, v14.16b 214 eor \in0\().16b, \in0\().16b, v10.16b 215 eor \in1\().16b, \in1\().16b, v11.16b 216 .endm 217 218 .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i 219 ld1 {v15.4s}, [\rk] 220 add \rkp, \rk, #16 221 mov \i, \rounds 2221111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 223 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 224 movi v15.16b, #0x40 225 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 226 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 227 sub_bytes_2x \in0, \in1 228 subs \i, \i, #1 229 ld1 {v15.4s}, [\rkp], #16 230 beq 2222f 231 mix_columns_2x \in0, \in1, \enc 232 b 1111b 2332222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 234 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 235 .endm 236 237 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 238 ld1 {v15.4s}, [\rk] 239 add \rkp, \rk, #16 240 mov \i, \rounds 2411111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 242 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 243 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 244 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 245 movi v15.16b, #0x40 246 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 247 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 248 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 249 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 250 sub_bytes_4x \in0, \in1, \in2, \in3 251 subs \i, \i, #1 252 ld1 {v15.4s}, [\rkp], #16 253 beq 2222f 254 mix_columns_2x \in0, \in1, \enc 255 mix_columns_2x \in2, \in3, \enc 256 b 1111b 2572222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 258 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 259 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 260 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 261 .endm 262 263 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i 264 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i 265 .endm 266 267 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i 268 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i 269 .endm 270 271 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 272 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 273 .endm 274 275 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 276 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 277 .endm 278 279#include "aes-modes.S" 280 281 .section ".rodata", "a" 282 .align 6 283.LForward_Sbox: 284 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 285 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 286 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 287 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 288 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 289 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 290 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 291 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 292 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 293 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 294 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 295 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 296 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 297 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 298 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 299 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 300 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 301 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 302 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 303 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 304 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 305 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 306 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 307 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 308 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 309 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 310 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 311 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 312 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 313 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 314 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 315 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 316 317.LReverse_Sbox: 318 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 319 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 320 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 321 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 322 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 323 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 324 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 325 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 326 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 327 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 328 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 329 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 330 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 331 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 332 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 333 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 334 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 335 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 336 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 337 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 338 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 339 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 340 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 341 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 342 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 343 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 344 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 345 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 346 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 347 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 348 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 349 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 350 351.LForward_ShiftRows: 352 .octa 0x0b06010c07020d08030e09040f0a0500 353 354.LReverse_ShiftRows: 355 .octa 0x0306090c0f0205080b0e0104070a0d00 356 357.Lror32by8: 358 .octa 0x0c0f0e0d080b0a090407060500030201 359