1/* 2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON 3 * 4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12 13#define AES_ENTRY(func) ENTRY(neon_ ## func) 14#define AES_ENDPROC(func) ENDPROC(neon_ ## func) 15 16 /* multiply by polynomial 'x' in GF(2^8) */ 17 .macro mul_by_x, out, in, temp, const 18 sshr \temp, \in, #7 19 add \out, \in, \in 20 and \temp, \temp, \const 21 eor \out, \out, \temp 22 .endm 23 24 /* preload the entire Sbox */ 25 .macro prepare, sbox, shiftrows, temp 26 adr \temp, \sbox 27 movi v12.16b, #0x40 28 ldr q13, \shiftrows 29 movi v14.16b, #0x1b 30 ld1 {v16.16b-v19.16b}, [\temp], #64 31 ld1 {v20.16b-v23.16b}, [\temp], #64 32 ld1 {v24.16b-v27.16b}, [\temp], #64 33 ld1 {v28.16b-v31.16b}, [\temp] 34 .endm 35 36 /* do preload for encryption */ 37 .macro enc_prepare, ignore0, ignore1, temp 38 prepare .LForward_Sbox, .LForward_ShiftRows, \temp 39 .endm 40 41 .macro enc_switch_key, ignore0, ignore1, temp 42 /* do nothing */ 43 .endm 44 45 /* do preload for decryption */ 46 .macro dec_prepare, ignore0, ignore1, temp 47 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp 48 .endm 49 50 /* apply SubBytes transformation using the the preloaded Sbox */ 51 .macro sub_bytes, in 52 sub v9.16b, \in\().16b, v12.16b 53 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b 54 sub v10.16b, v9.16b, v12.16b 55 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b 56 sub v11.16b, v10.16b, v12.16b 57 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b 58 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b 59 .endm 60 61 /* apply MixColumns transformation */ 62 .macro mix_columns, in 63 mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b 64 rev32 v8.8h, \in\().8h 65 eor \in\().16b, v10.16b, \in\().16b 66 shl v9.4s, v8.4s, #24 67 shl v11.4s, \in\().4s, #24 68 sri v9.4s, v8.4s, #8 69 sri v11.4s, \in\().4s, #8 70 eor v9.16b, v9.16b, v8.16b 71 eor v10.16b, v10.16b, v9.16b 72 eor \in\().16b, v10.16b, v11.16b 73 .endm 74 75 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ 76 .macro inv_mix_columns, in 77 mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b 78 mul_by_x v11.16b, v11.16b, v10.16b, v14.16b 79 eor \in\().16b, \in\().16b, v11.16b 80 rev32 v11.8h, v11.8h 81 eor \in\().16b, \in\().16b, v11.16b 82 mix_columns \in 83 .endm 84 85 .macro do_block, enc, in, rounds, rk, rkp, i 86 ld1 {v15.16b}, [\rk] 87 add \rkp, \rk, #16 88 mov \i, \rounds 891111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 90 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ 91 sub_bytes \in 92 ld1 {v15.16b}, [\rkp], #16 93 subs \i, \i, #1 94 beq 2222f 95 .if \enc == 1 96 mix_columns \in 97 .else 98 inv_mix_columns \in 99 .endif 100 b 1111b 1012222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ 102 .endm 103 104 .macro encrypt_block, in, rounds, rk, rkp, i 105 do_block 1, \in, \rounds, \rk, \rkp, \i 106 .endm 107 108 .macro decrypt_block, in, rounds, rk, rkp, i 109 do_block 0, \in, \rounds, \rk, \rkp, \i 110 .endm 111 112 /* 113 * Interleaved versions: functionally equivalent to the 114 * ones above, but applied to 2 or 4 AES states in parallel. 115 */ 116 117 .macro sub_bytes_2x, in0, in1 118 sub v8.16b, \in0\().16b, v12.16b 119 sub v9.16b, \in1\().16b, v12.16b 120 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 121 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 122 sub v10.16b, v8.16b, v12.16b 123 sub v11.16b, v9.16b, v12.16b 124 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 125 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 126 sub v8.16b, v10.16b, v12.16b 127 sub v9.16b, v11.16b, v12.16b 128 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b 129 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b 130 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 131 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 132 .endm 133 134 .macro sub_bytes_4x, in0, in1, in2, in3 135 sub v8.16b, \in0\().16b, v12.16b 136 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b 137 sub v9.16b, \in1\().16b, v12.16b 138 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b 139 sub v10.16b, \in2\().16b, v12.16b 140 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b 141 sub v11.16b, \in3\().16b, v12.16b 142 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b 143 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b 144 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b 145 sub v8.16b, v8.16b, v12.16b 146 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b 147 sub v9.16b, v9.16b, v12.16b 148 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b 149 sub v10.16b, v10.16b, v12.16b 150 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b 151 sub v11.16b, v11.16b, v12.16b 152 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b 153 sub v8.16b, v8.16b, v12.16b 154 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b 155 sub v9.16b, v9.16b, v12.16b 156 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b 157 sub v10.16b, v10.16b, v12.16b 158 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b 159 sub v11.16b, v11.16b, v12.16b 160 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b 161 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b 162 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b 163 .endm 164 165 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const 166 sshr \tmp0\().16b, \in0\().16b, #7 167 add \out0\().16b, \in0\().16b, \in0\().16b 168 sshr \tmp1\().16b, \in1\().16b, #7 169 and \tmp0\().16b, \tmp0\().16b, \const\().16b 170 add \out1\().16b, \in1\().16b, \in1\().16b 171 and \tmp1\().16b, \tmp1\().16b, \const\().16b 172 eor \out0\().16b, \out0\().16b, \tmp0\().16b 173 eor \out1\().16b, \out1\().16b, \tmp1\().16b 174 .endm 175 176 .macro mix_columns_2x, in0, in1 177 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 178 rev32 v10.8h, \in0\().8h 179 rev32 v11.8h, \in1\().8h 180 eor \in0\().16b, v8.16b, \in0\().16b 181 eor \in1\().16b, v9.16b, \in1\().16b 182 shl v12.4s, v10.4s, #24 183 shl v13.4s, v11.4s, #24 184 eor v8.16b, v8.16b, v10.16b 185 sri v12.4s, v10.4s, #8 186 shl v10.4s, \in0\().4s, #24 187 eor v9.16b, v9.16b, v11.16b 188 sri v13.4s, v11.4s, #8 189 shl v11.4s, \in1\().4s, #24 190 sri v10.4s, \in0\().4s, #8 191 eor \in0\().16b, v8.16b, v12.16b 192 sri v11.4s, \in1\().4s, #8 193 eor \in1\().16b, v9.16b, v13.16b 194 eor \in0\().16b, v10.16b, \in0\().16b 195 eor \in1\().16b, v11.16b, \in1\().16b 196 .endm 197 198 .macro inv_mix_cols_2x, in0, in1 199 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 200 mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 201 eor \in0\().16b, \in0\().16b, v8.16b 202 eor \in1\().16b, \in1\().16b, v9.16b 203 rev32 v8.8h, v8.8h 204 rev32 v9.8h, v9.8h 205 eor \in0\().16b, \in0\().16b, v8.16b 206 eor \in1\().16b, \in1\().16b, v9.16b 207 mix_columns_2x \in0, \in1 208 .endm 209 210 .macro inv_mix_cols_4x, in0, in1, in2, in3 211 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 212 mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 213 mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 214 mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 215 eor \in0\().16b, \in0\().16b, v8.16b 216 eor \in1\().16b, \in1\().16b, v9.16b 217 eor \in2\().16b, \in2\().16b, v10.16b 218 eor \in3\().16b, \in3\().16b, v11.16b 219 rev32 v8.8h, v8.8h 220 rev32 v9.8h, v9.8h 221 rev32 v10.8h, v10.8h 222 rev32 v11.8h, v11.8h 223 eor \in0\().16b, \in0\().16b, v8.16b 224 eor \in1\().16b, \in1\().16b, v9.16b 225 eor \in2\().16b, \in2\().16b, v10.16b 226 eor \in3\().16b, \in3\().16b, v11.16b 227 mix_columns_2x \in0, \in1 228 mix_columns_2x \in2, \in3 229 .endm 230 231 .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i 232 ld1 {v15.16b}, [\rk] 233 add \rkp, \rk, #16 234 mov \i, \rounds 2351111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 236 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 237 sub_bytes_2x \in0, \in1 238 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 239 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 240 ld1 {v15.16b}, [\rkp], #16 241 subs \i, \i, #1 242 beq 2222f 243 .if \enc == 1 244 mix_columns_2x \in0, \in1 245 ldr q13, .LForward_ShiftRows 246 .else 247 inv_mix_cols_2x \in0, \in1 248 ldr q13, .LReverse_ShiftRows 249 .endif 250 movi v12.16b, #0x40 251 b 1111b 2522222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 253 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 254 .endm 255 256 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i 257 ld1 {v15.16b}, [\rk] 258 add \rkp, \rk, #16 259 mov \i, \rounds 2601111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 261 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 262 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 263 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 264 sub_bytes_4x \in0, \in1, \in2, \in3 265 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ 266 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ 267 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ 268 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ 269 ld1 {v15.16b}, [\rkp], #16 270 subs \i, \i, #1 271 beq 2222f 272 .if \enc == 1 273 mix_columns_2x \in0, \in1 274 mix_columns_2x \in2, \in3 275 ldr q13, .LForward_ShiftRows 276 .else 277 inv_mix_cols_4x \in0, \in1, \in2, \in3 278 ldr q13, .LReverse_ShiftRows 279 .endif 280 movi v12.16b, #0x40 281 b 1111b 2822222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ 283 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ 284 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ 285 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ 286 .endm 287 288 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i 289 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i 290 .endm 291 292 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i 293 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i 294 .endm 295 296 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 297 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 298 .endm 299 300 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i 301 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i 302 .endm 303 304#include "aes-modes.S" 305 306 .text 307 .align 4 308.LForward_ShiftRows: 309 .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 310 .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb 311 312.LReverse_ShiftRows: 313 .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb 314 .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 315 316.LForward_Sbox: 317 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 318 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 319 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 320 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 321 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 322 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 323 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 324 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 325 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 326 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 327 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 328 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 329 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 330 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 331 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 332 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 333 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 334 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 335 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 336 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 337 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 338 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 339 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 340 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 341 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 342 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 343 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 344 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 345 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 346 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 347 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 348 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 349 350.LReverse_Sbox: 351 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 352 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 353 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 354 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 355 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 356 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 357 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 358 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 359 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 360 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 361 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 362 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 363 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 364 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 365 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 366 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 367 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 368 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 369 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 370 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 371 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 372 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 373 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 374 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 375 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 376 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 377 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 378 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 379 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 380 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 381 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 382 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 383