1/* Do not modify. This file is auto-generated from aes-gcm-armv8-unroll8_64.pl. */ 2#include "arm_arch.h" 3 4#if __ARM_MAX_ARCH__>=8 5.arch armv8-a+crypto 6.text 7.globl unroll8_eor3_aes_gcm_enc_128_kernel 8.type unroll8_eor3_aes_gcm_enc_128_kernel,%function 9.align 4 10unroll8_eor3_aes_gcm_enc_128_kernel: 11 AARCH64_VALID_CALL_TARGET 12 cbz x1, .L128_enc_ret 13 stp d8, d9, [sp, #-80]! 14 lsr x9, x1, #3 15 mov x16, x4 16 mov x8, x5 17 stp d10, d11, [sp, #16] 18 stp d12, d13, [sp, #32] 19 stp d14, d15, [sp, #48] 20 mov x5, #0xc200000000000000 21 stp x5, xzr, [sp, #64] 22 add x10, sp, #64 23 24 mov x15, #0x100000000 //set up counter increment 25 movi v31.16b, #0x0 26 mov v31.d[1], x15 27 mov x5, x9 28 ld1 { v0.16b}, [x16] //CTR block 0 29 30 sub x5, x5, #1 //byte_len - 1 31 32 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 33 34 rev32 v30.16b, v0.16b //set up reversed counter 35 36 add v30.4s, v30.4s, v31.4s //CTR block 0 37 38 rev32 v1.16b, v30.16b //CTR block 1 39 add v30.4s, v30.4s, v31.4s //CTR block 1 40 41 rev32 v2.16b, v30.16b //CTR block 2 42 add v30.4s, v30.4s, v31.4s //CTR block 2 43 44 rev32 v3.16b, v30.16b //CTR block 3 45 add v30.4s, v30.4s, v31.4s //CTR block 3 46 47 rev32 v4.16b, v30.16b //CTR block 4 48 add v30.4s, v30.4s, v31.4s //CTR block 4 49 50 rev32 v5.16b, v30.16b //CTR block 5 51 add v30.4s, v30.4s, v31.4s //CTR block 5 52 ldp q26, q27, [x8, #0] //load rk0, rk1 53 54 rev32 v6.16b, v30.16b //CTR block 6 55 add v30.4s, v30.4s, v31.4s //CTR block 6 56 57 rev32 v7.16b, v30.16b //CTR block 7 58 add v30.4s, v30.4s, v31.4s //CTR block 7 59 60 aese v4.16b, v26.16b 61 aesmc v4.16b, v4.16b //AES block 4 - round 0 62 aese v6.16b, v26.16b 63 aesmc v6.16b, v6.16b //AES block 6 - round 0 64 aese v3.16b, v26.16b 65 aesmc v3.16b, v3.16b //AES block 3 - round 0 66 67 aese v0.16b, v26.16b 68 aesmc v0.16b, v0.16b //AES block 0 - round 0 69 aese v1.16b, v26.16b 70 aesmc v1.16b, v1.16b //AES block 1 - round 0 71 aese v2.16b, v26.16b 72 aesmc v2.16b, v2.16b //AES block 2 - round 0 73 74 aese v7.16b, v26.16b 75 aesmc v7.16b, v7.16b //AES block 7 - round 0 76 aese v5.16b, v26.16b 77 aesmc v5.16b, v5.16b //AES block 5 - round 0 78 ldp q28, q26, [x8, #32] //load rk2, rk3 79 80 aese v3.16b, v27.16b 81 aesmc v3.16b, v3.16b //AES block 3 - round 1 82 83 aese v7.16b, v27.16b 84 aesmc v7.16b, v7.16b //AES block 7 - round 1 85 aese v5.16b, v27.16b 86 aesmc v5.16b, v5.16b //AES block 5 - round 1 87 aese v4.16b, v27.16b 88 aesmc v4.16b, v4.16b //AES block 4 - round 1 89 90 aese v2.16b, v27.16b 91 aesmc v2.16b, v2.16b //AES block 2 - round 1 92 aese v6.16b, v27.16b 93 aesmc v6.16b, v6.16b //AES block 6 - round 1 94 aese v0.16b, v27.16b 95 aesmc v0.16b, v0.16b //AES block 0 - round 1 96 97 aese v5.16b, v28.16b 98 aesmc v5.16b, v5.16b //AES block 5 - round 2 99 aese v1.16b, v27.16b 100 aesmc v1.16b, v1.16b //AES block 1 - round 1 101 aese v0.16b, v28.16b 102 aesmc v0.16b, v0.16b //AES block 0 - round 2 103 104 aese v2.16b, v28.16b 105 aesmc v2.16b, v2.16b //AES block 2 - round 2 106 aese v3.16b, v28.16b 107 aesmc v3.16b, v3.16b //AES block 3 - round 2 108 aese v7.16b, v28.16b 109 aesmc v7.16b, v7.16b //AES block 7 - round 2 110 111 aese v1.16b, v28.16b 112 aesmc v1.16b, v1.16b //AES block 1 - round 2 113 aese v6.16b, v28.16b 114 aesmc v6.16b, v6.16b //AES block 6 - round 2 115 aese v4.16b, v28.16b 116 aesmc v4.16b, v4.16b //AES block 4 - round 2 117 118 aese v2.16b, v26.16b 119 aesmc v2.16b, v2.16b //AES block 2 - round 3 120 121 ldp q27, q28, [x8, #64] //load rk4, rk5 122 aese v5.16b, v26.16b 123 aesmc v5.16b, v5.16b //AES block 5 - round 3 124 aese v0.16b, v26.16b 125 aesmc v0.16b, v0.16b //AES block 0 - round 3 126 127 aese v4.16b, v26.16b 128 aesmc v4.16b, v4.16b //AES block 4 - round 3 129 aese v3.16b, v26.16b 130 aesmc v3.16b, v3.16b //AES block 3 - round 3 131 aese v6.16b, v26.16b 132 aesmc v6.16b, v6.16b //AES block 6 - round 3 133 134 aese v7.16b, v26.16b 135 aesmc v7.16b, v7.16b //AES block 7 - round 3 136 137 aese v6.16b, v27.16b 138 aesmc v6.16b, v6.16b //AES block 6 - round 4 139 aese v1.16b, v26.16b 140 aesmc v1.16b, v1.16b //AES block 1 - round 3 141 aese v5.16b, v27.16b 142 aesmc v5.16b, v5.16b //AES block 5 - round 4 143 144 aese v7.16b, v27.16b 145 aesmc v7.16b, v7.16b //AES block 7 - round 4 146 aese v4.16b, v27.16b 147 aesmc v4.16b, v4.16b //AES block 4 - round 4 148 aese v0.16b, v27.16b 149 aesmc v0.16b, v0.16b //AES block 0 - round 4 150 151 aese v1.16b, v27.16b 152 aesmc v1.16b, v1.16b //AES block 1 - round 4 153 aese v2.16b, v27.16b 154 aesmc v2.16b, v2.16b //AES block 2 - round 4 155 aese v3.16b, v27.16b 156 aesmc v3.16b, v3.16b //AES block 3 - round 4 157 158 aese v7.16b, v28.16b 159 aesmc v7.16b, v7.16b //AES block 7 - round 5 160 aese v0.16b, v28.16b 161 aesmc v0.16b, v0.16b //AES block 0 - round 5 162 ldp q26, q27, [x8, #96] //load rk6, rk7 163 164 aese v1.16b, v28.16b 165 aesmc v1.16b, v1.16b //AES block 1 - round 5 166 aese v3.16b, v28.16b 167 aesmc v3.16b, v3.16b //AES block 3 - round 5 168 aese v2.16b, v28.16b 169 aesmc v2.16b, v2.16b //AES block 2 - round 5 170 171 aese v4.16b, v28.16b 172 aesmc v4.16b, v4.16b //AES block 4 - round 5 173 aese v5.16b, v28.16b 174 aesmc v5.16b, v5.16b //AES block 5 - round 5 175 aese v6.16b, v28.16b 176 aesmc v6.16b, v6.16b //AES block 6 - round 5 177 178 aese v4.16b, v26.16b 179 aesmc v4.16b, v4.16b //AES block 4 - round 6 180 aese v3.16b, v26.16b 181 aesmc v3.16b, v3.16b //AES block 3 - round 6 182 aese v2.16b, v26.16b 183 aesmc v2.16b, v2.16b //AES block 2 - round 6 184 185 aese v7.16b, v26.16b 186 aesmc v7.16b, v7.16b //AES block 7 - round 6 187 aese v6.16b, v26.16b 188 aesmc v6.16b, v6.16b //AES block 6 - round 6 189 aese v5.16b, v26.16b 190 aesmc v5.16b, v5.16b //AES block 5 - round 6 191 192 aese v0.16b, v26.16b 193 aesmc v0.16b, v0.16b //AES block 0 - round 6 194 aese v1.16b, v26.16b 195 aesmc v1.16b, v1.16b //AES block 1 - round 6 196 ldp q28, q26, [x8, #128] //load rk8, rk9 197 198 aese v5.16b, v27.16b 199 aesmc v5.16b, v5.16b //AES block 5 - round 7 200 201 ld1 { v19.16b}, [x3] 202 ext v19.16b, v19.16b, v19.16b, #8 203 rev64 v19.16b, v19.16b 204 205 aese v7.16b, v27.16b 206 aesmc v7.16b, v7.16b //AES block 7 - round 7 207 208 aese v4.16b, v27.16b 209 aesmc v4.16b, v4.16b //AES block 4 - round 7 210 aese v3.16b, v27.16b 211 aesmc v3.16b, v3.16b //AES block 3 - round 7 212 aese v6.16b, v27.16b 213 aesmc v6.16b, v6.16b //AES block 6 - round 7 214 215 aese v1.16b, v27.16b 216 aesmc v1.16b, v1.16b //AES block 1 - round 7 217 aese v2.16b, v27.16b 218 aesmc v2.16b, v2.16b //AES block 2 - round 7 219 aese v0.16b, v27.16b 220 aesmc v0.16b, v0.16b //AES block 0 - round 7 221 222 aese v3.16b, v28.16b 223 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 224 aese v6.16b, v28.16b 225 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 226 aese v2.16b, v28.16b 227 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 228 229 aese v7.16b, v28.16b 230 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 231 aese v0.16b, v28.16b 232 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 233 ldr q27, [x8, #160] //load rk10 234 235 aese v3.16b, v26.16b //AES block 8k+11 - round 9 236 aese v4.16b, v28.16b 237 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 238 aese v2.16b, v26.16b //AES block 8k+10 - round 9 239 240 aese v5.16b, v28.16b 241 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 242 aese v1.16b, v28.16b 243 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 244 aese v6.16b, v26.16b //AES block 8k+14 - round 9 245 246 aese v4.16b, v26.16b //AES block 8k+12 - round 9 247 add x5, x5, x0 248 aese v0.16b, v26.16b //AES block 8k+8 - round 9 249 250 aese v7.16b, v26.16b //AES block 8k+15 - round 9 251 aese v5.16b, v26.16b //AES block 8k+13 - round 9 252 aese v1.16b, v26.16b //AES block 8k+9 - round 9 253 254 add x4, x0, x1, lsr #3 //end_input_ptr 255 cmp x0, x5 //check if we have <= 8 blocks 256 b.ge .L128_enc_tail //handle tail 257 258 ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 259 260 ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 261 262 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 263 264 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 265 cmp x0, x5 //check if we have <= 8 blocks 266 267.inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result 268 rev32 v0.16b, v30.16b //CTR block 8 269 add v30.4s, v30.4s, v31.4s //CTR block 8 270 271.inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result 272 stp q8, q9, [x2], #32 //AES block 0, 1 - store result 273 274 rev32 v1.16b, v30.16b //CTR block 9 275.inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 276 add v30.4s, v30.4s, v31.4s //CTR block 9 277 278.inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result 279.inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 280.inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 281 282 rev32 v2.16b, v30.16b //CTR block 10 283 add v30.4s, v30.4s, v31.4s //CTR block 10 284 285.inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result 286.inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b,v27.16b //AES block 7 - result 287 stp q10, q11, [x2], #32 //AES block 2, 3 - store result 288 289 rev32 v3.16b, v30.16b //CTR block 11 290 add v30.4s, v30.4s, v31.4s //CTR block 11 291 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 292 293 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 294 295 rev32 v4.16b, v30.16b //CTR block 12 296 add v30.4s, v30.4s, v31.4s //CTR block 12 297 b.ge .L128_enc_prepretail //do prepretail 298 299.L128_enc_main_loop: //main loop start 300 rev32 v5.16b, v30.16b //CTR block 8k+13 301 ldr q20, [x3, #128] //load h5l | h5h 302 ext v20.16b, v20.16b, v20.16b, #8 303 ldr q22, [x3, #160] //load h6l | h6h 304 ext v22.16b, v22.16b, v22.16b, #8 305 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 306 307 rev64 v9.16b, v9.16b //GHASH block 8k+1 308 rev64 v8.16b, v8.16b //GHASH block 8k 309 ldr q23, [x3, #176] //load h7l | h7h 310 ext v23.16b, v23.16b, v23.16b, #8 311 ldr q25, [x3, #208] //load h8l | h8h 312 ext v25.16b, v25.16b, v25.16b, #8 313 314 rev32 v6.16b, v30.16b //CTR block 8k+14 315 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 316 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 317 318 ldr q21, [x3, #144] //load h6k | h5k 319 ldr q24, [x3, #192] //load h8k | h7k 320 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 321 rev64 v11.16b, v11.16b //GHASH block 8k+3 322 323 ldp q26, q27, [x8, #0] //load rk0, rk1 324 eor v8.16b, v8.16b, v19.16b //PRE 1 325 rev32 v7.16b, v30.16b //CTR block 8k+15 326 327 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 328 329 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 330 rev64 v10.16b, v10.16b //GHASH block 8k+2 331 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 332 333 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 334 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 335 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 336 337 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 338 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 339 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 340 341 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 342 ldr q23, [x3, #80] //load h3l | h3h 343 ext v23.16b, v23.16b, v23.16b, #8 344 ldr q25, [x3, #112] //load h3l | h3h 345 ext v25.16b, v25.16b, v25.16b, #8 346 aese v5.16b, v26.16b 347 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 348 349 aese v1.16b, v26.16b 350 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 351 aese v4.16b, v26.16b 352 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 353 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 354 355 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 356 aese v2.16b, v26.16b 357 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 358 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 359 360 aese v6.16b, v26.16b 361 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 362 aese v1.16b, v27.16b 363 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 364 aese v0.16b, v26.16b 365 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 366 367 aese v2.16b, v27.16b 368 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 369 aese v3.16b, v26.16b 370 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 371 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 372 373 aese v5.16b, v27.16b 374 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 375 aese v7.16b, v26.16b 376 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 377 aese v0.16b, v27.16b 378 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 379 380.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b,v9.16b //GHASH block 8k+2, 8k+3 - high 381 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 382 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 383 384 ldp q28, q26, [x8, #32] //load rk2, rk3 385 aese v4.16b, v27.16b 386 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 387 aese v3.16b, v27.16b 388 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 389 390 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 391 aese v7.16b, v27.16b 392 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 393 aese v6.16b, v27.16b 394 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 395 396 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 397 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 398 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 399 400 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 401.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 402 403 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 404 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 405 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 406 407 aese v5.16b, v28.16b 408 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 409 aese v4.16b, v28.16b 410 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 411 aese v2.16b, v28.16b 412 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 413 414 aese v1.16b, v28.16b 415 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 416.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 417 aese v6.16b, v28.16b 418 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 419 420 aese v0.16b, v28.16b 421 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 422 aese v3.16b, v28.16b 423 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 424 aese v7.16b, v28.16b 425 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 426 427 aese v6.16b, v26.16b 428 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 429 ldr q21, [x3, #48] //load h2k | h1k 430 ldr q24, [x3, #96] //load h4k | h3k 431 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 432 433 ldp q27, q28, [x8, #64] //load rk4, rk5 434 aese v2.16b, v26.16b 435 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 436 aese v1.16b, v26.16b 437 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 438 439 ldr q20, [x3, #32] //load h1l | h1h 440 ext v20.16b, v20.16b, v20.16b, #8 441 ldr q22, [x3, #64] //load h1l | h1h 442 ext v22.16b, v22.16b, v22.16b, #8 443 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 444 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 445 446 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 447 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 448 449 aese v0.16b, v26.16b 450 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 451 aese v3.16b, v26.16b 452 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 453 454 aese v7.16b, v26.16b 455 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 456 aese v4.16b, v26.16b 457 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 458 459 aese v5.16b, v26.16b 460 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 461 aese v0.16b, v27.16b 462 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 463 464 aese v7.16b, v27.16b 465 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 466 aese v3.16b, v27.16b 467 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 468 aese v4.16b, v27.16b 469 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 470 471 aese v5.16b, v27.16b 472 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 473 aese v6.16b, v27.16b 474 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 475 aese v1.16b, v27.16b 476 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 477 478 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 479 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 480 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 481 482 aese v2.16b, v27.16b 483 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 484 ldp q26, q27, [x8, #96] //load rk6, rk7 485 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 486 487 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 488 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 489 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 490 491 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 492 aese v2.16b, v28.16b 493 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 494 aese v5.16b, v28.16b 495 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 496 497 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 498.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 499 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 500 501.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 502 aese v6.16b, v28.16b 503 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 504 505 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 506 aese v7.16b, v28.16b 507 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 508 aese v1.16b, v28.16b 509 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 510 511 aese v3.16b, v28.16b 512 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 513 aese v4.16b, v28.16b 514 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 515 aese v0.16b, v28.16b 516 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 517 518.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 519 ldr d16, [x10] //MODULO - load modulo constant 520 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 521 522 aese v7.16b, v26.16b 523 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 524 aese v5.16b, v26.16b 525 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 526 527 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 528 aese v1.16b, v26.16b 529 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 530 aese v2.16b, v26.16b 531 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 532 533 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 534.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 535 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 536 537 aese v3.16b, v26.16b 538 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 539 rev32 v20.16b, v30.16b //CTR block 8k+16 540 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 541 542 aese v4.16b, v26.16b 543 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 544 aese v0.16b, v26.16b 545 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 546 aese v6.16b, v26.16b 547 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 548 549.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 550 ldp q28, q26, [x8, #128] //load rk8, rk9 551.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 552 553 aese v2.16b, v27.16b 554 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 555 aese v7.16b, v27.16b 556 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 557 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 558 559 aese v5.16b, v27.16b 560 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 561 aese v6.16b, v27.16b 562 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 563 aese v1.16b, v27.16b 564 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 565 566 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 567 aese v0.16b, v27.16b 568 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 569 aese v4.16b, v27.16b 570 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 571 572 rev32 v22.16b, v30.16b //CTR block 8k+17 573 aese v3.16b, v27.16b 574 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 575 576 aese v5.16b, v28.16b 577 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 578 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext 579 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 580 581 aese v2.16b, v28.16b 582 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 583 aese v1.16b, v28.16b 584 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 585 aese v7.16b, v28.16b 586 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 587 588 aese v4.16b, v28.16b 589 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 590.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 591 ldr q27, [x8, #160] //load rk10 592 593 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 594 rev32 v23.16b, v30.16b //CTR block 8k+18 595 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 596 aese v3.16b, v28.16b 597 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 598 599 aese v0.16b, v28.16b 600 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 601.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 602 aese v6.16b, v28.16b 603 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 604 605 aese v2.16b, v26.16b //AES block 8k+10 - round 9 606 aese v4.16b, v26.16b //AES block 8k+12 - round 9 607 aese v1.16b, v26.16b //AES block 8k+9 - round 9 608 609 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext 610 rev32 v25.16b, v30.16b //CTR block 8k+19 611 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 612 613 cmp x0, x5 //.LOOP CONTROL 614.inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 615 aese v7.16b, v26.16b //AES block 8k+15 - round 9 616 617 aese v6.16b, v26.16b //AES block 8k+14 - round 9 618 aese v3.16b, v26.16b //AES block 8k+11 - round 9 619 620.inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result 621 622 mov v2.16b, v23.16b //CTR block 8k+18 623 aese v0.16b, v26.16b //AES block 8k+8 - round 9 624 625 rev32 v4.16b, v30.16b //CTR block 8k+20 626 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 627 628.inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result 629 aese v5.16b, v26.16b //AES block 8k+13 - round 9 630 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 631 632.inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result 633.inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result 634 mov v3.16b, v25.16b //CTR block 8k+19 635 636 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 637.inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 638 mov v1.16b, v22.16b //CTR block 8k+17 639 640.inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result 641 mov v0.16b, v20.16b //CTR block 8k+16 642 stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 643 644 stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 645.inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 646 647 stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result 648.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 649 650 stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result 651 b.lt .L128_enc_main_loop 652 653.L128_enc_prepretail: //PREPRETAIL 654 rev32 v5.16b, v30.16b //CTR block 8k+13 655 ldr q23, [x3, #176] //load h7l | h7h 656 ext v23.16b, v23.16b, v23.16b, #8 657 ldr q25, [x3, #208] //load h8l | h8h 658 ext v25.16b, v25.16b, v25.16b, #8 659 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 660 661 ldr q20, [x3, #128] //load h5l | h5h 662 ext v20.16b, v20.16b, v20.16b, #8 663 ldr q22, [x3, #160] //load h6l | h6h 664 ext v22.16b, v22.16b, v22.16b, #8 665 rev64 v8.16b, v8.16b //GHASH block 8k 666 rev64 v9.16b, v9.16b //GHASH block 8k+1 667 668 ldr q21, [x3, #144] //load h6k | h5k 669 ldr q24, [x3, #192] //load h6k | h5k 670 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 671 rev64 v11.16b, v11.16b //GHASH block 8k+3 672 673 rev64 v10.16b, v10.16b //GHASH block 8k+2 674 eor v8.16b, v8.16b, v19.16b //PRE 1 675 676 rev32 v6.16b, v30.16b //CTR block 8k+14 677 678 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 679 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 680 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 681 682 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 683 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 684 685 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 686 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 687 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 688 689 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 690 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 691 692 ldp q26, q27, [x8, #0] //load rk0, rk1 693 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 694 695 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 696 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 697 698 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 699 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 700 701 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 702 703 rev32 v7.16b, v30.16b //CTR block 8k+15 704 705 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 706 707 aese v2.16b, v26.16b 708 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 709 710 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 711 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 712 713 aese v6.16b, v26.16b 714 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 715 aese v3.16b, v26.16b 716 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 717 718 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 719 aese v1.16b, v26.16b 720 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 721 722.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 723 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 724 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 725 726 aese v5.16b, v26.16b 727 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 728 aese v7.16b, v26.16b 729 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 730 731 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 732 aese v4.16b, v26.16b 733 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 734 aese v0.16b, v26.16b 735 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 736 737 aese v3.16b, v27.16b 738 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 739 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 740 741 ldr q23, [x3, #80] //load h3l | h3h 742 ext v23.16b, v23.16b, v23.16b, #8 743 ldr q25, [x3, #112] //load h4l | h4h 744 ext v25.16b, v25.16b, v25.16b, #8 745 746 ldp q28, q26, [x8, #32] //load rk2, rk3 747 aese v5.16b, v27.16b 748 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 749 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 750 751.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 752 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 753 754 aese v1.16b, v27.16b 755 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 756 aese v0.16b, v27.16b 757 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 758 759.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 760 ldr q21, [x3, #48] //load h2k | h1k 761 ldr q24, [x3, #96] //load h4k | h3k 762 aese v2.16b, v27.16b 763 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 764 765 aese v4.16b, v27.16b 766 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 767 aese v7.16b, v27.16b 768 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 769 770 aese v5.16b, v28.16b 771 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 772 aese v2.16b, v28.16b 773 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 774 aese v3.16b, v28.16b 775 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 776 777 aese v1.16b, v28.16b 778 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 779 aese v6.16b, v27.16b 780 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 781 aese v4.16b, v28.16b 782 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 783 784 aese v5.16b, v26.16b 785 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 786 aese v0.16b, v28.16b 787 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 788 789 aese v6.16b, v28.16b 790 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 791 aese v7.16b, v28.16b 792 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 793 ldp q27, q28, [x8, #64] //load rk4, rk5 794 795 ldr q20, [x3, #32] //load h1l | h1h 796 ext v20.16b, v20.16b, v20.16b, #8 797 ldr q22, [x3, #64] //load h1l | h1h 798 ext v22.16b, v22.16b, v22.16b, #8 799 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 800 aese v0.16b, v26.16b 801 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 802 803 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 804 aese v6.16b, v26.16b 805 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 806 aese v3.16b, v26.16b 807 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 808 809 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 810 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 811 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 812 813 aese v2.16b, v26.16b 814 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 815 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 816 817 aese v7.16b, v26.16b 818 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 819 aese v1.16b, v26.16b 820 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 821 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 822 823 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 824 aese v4.16b, v26.16b 825 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 826 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 827 828 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 829 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 830 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 831 832 aese v1.16b, v27.16b 833 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 834 aese v3.16b, v27.16b 835 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 836.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 837 838.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 839 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 840 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 841 842 aese v1.16b, v28.16b 843 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 844 aese v6.16b, v27.16b 845 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 846 aese v0.16b, v27.16b 847 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 848 849 aese v7.16b, v27.16b 850 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 851 aese v2.16b, v27.16b 852 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 853 854 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 855 aese v4.16b, v27.16b 856 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 857 aese v5.16b, v27.16b 858 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 859 860 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 861 ldp q26, q27, [x8, #96] //load rk6, rk7 862 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 863 864.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 865 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 866 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 867 868 aese v0.16b, v28.16b 869 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 870 aese v7.16b, v28.16b 871 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 872 ldr d16, [x10] //MODULO - load modulo constant 873 874 aese v2.16b, v28.16b 875 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 876 aese v4.16b, v28.16b 877 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 878 879.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 880 aese v5.16b, v28.16b 881 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 882 aese v6.16b, v28.16b 883 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 884 885 aese v3.16b, v28.16b 886 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 887 aese v4.16b, v26.16b 888 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 889 890 aese v5.16b, v26.16b 891 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 892 aese v2.16b, v26.16b 893 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 894 aese v0.16b, v26.16b 895 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 896 897 aese v3.16b, v26.16b 898 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 899.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 900.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 901 902 aese v6.16b, v26.16b 903 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 904 aese v1.16b, v26.16b 905 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 906 aese v7.16b, v26.16b 907 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 908 909 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 910.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 911 ldp q28, q26, [x8, #128] //load rk8, rk9 912 913 aese v3.16b, v27.16b 914 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 915 aese v6.16b, v27.16b 916 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 917 aese v1.16b, v27.16b 918 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 919 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 920 921 aese v5.16b, v27.16b 922 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 923 aese v0.16b, v27.16b 924 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 925.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 926 927 aese v2.16b, v27.16b 928 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 929 aese v7.16b, v27.16b 930 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 931 932 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 933 aese v4.16b, v27.16b 934 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 935 936 aese v7.16b, v28.16b 937 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 938 aese v2.16b, v28.16b 939 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 940 aese v1.16b, v28.16b 941 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 942 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 943 944 aese v6.16b, v28.16b 945 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 946.inst 0xce114a73 //eor3 v19.16b, v19.16b, v17.16b, v18.16b //MODULO - fold into low 947 aese v4.16b, v28.16b 948 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 949 950 aese v3.16b, v28.16b 951 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 952 aese v0.16b, v28.16b 953 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 954 aese v5.16b, v28.16b 955 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 956 957 ldr q27, [x8, #160] //load rk10 958 aese v6.16b, v26.16b //AES block 8k+14 - round 9 959 aese v2.16b, v26.16b //AES block 8k+10 - round 9 960 961 aese v0.16b, v26.16b //AES block 8k+8 - round 9 962 aese v1.16b, v26.16b //AES block 8k+9 - round 9 963 964 aese v3.16b, v26.16b //AES block 8k+11 - round 9 965 aese v5.16b, v26.16b //AES block 8k+13 - round 9 966 967 aese v4.16b, v26.16b //AES block 8k+12 - round 9 968 aese v7.16b, v26.16b //AES block 8k+15 - round 9 969.L128_enc_tail: //TAIL 970 971 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 972 ldr q8, [x0], #16 //AES block 8k+8 - load plaintext 973 974 mov v29.16b, v27.16b 975 ldp q20, q21, [x3, #128] //load h5l | h5h 976 ext v20.16b, v20.16b, v20.16b, #8 977 978.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 979 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 980 ldp q22, q23, [x3, #160] //load h6l | h6h 981 ext v22.16b, v22.16b, v22.16b, #8 982 ext v23.16b, v23.16b, v23.16b, #8 983 984 ldp q24, q25, [x3, #192] //load h8k | h7k 985 ext v25.16b, v25.16b, v25.16b, #8 986 cmp x5, #112 987 b.gt .L128_enc_blocks_more_than_7 988 989 mov v7.16b, v6.16b 990 mov v6.16b, v5.16b 991 movi v17.8b, #0 992 993 cmp x5, #96 994 sub v30.4s, v30.4s, v31.4s 995 mov v5.16b, v4.16b 996 997 mov v4.16b, v3.16b 998 mov v3.16b, v2.16b 999 mov v2.16b, v1.16b 1000 1001 movi v19.8b, #0 1002 movi v18.8b, #0 1003 b.gt .L128_enc_blocks_more_than_6 1004 1005 mov v7.16b, v6.16b 1006 cmp x5, #80 1007 1008 sub v30.4s, v30.4s, v31.4s 1009 mov v6.16b, v5.16b 1010 mov v5.16b, v4.16b 1011 1012 mov v4.16b, v3.16b 1013 mov v3.16b, v1.16b 1014 b.gt .L128_enc_blocks_more_than_5 1015 1016 cmp x5, #64 1017 sub v30.4s, v30.4s, v31.4s 1018 1019 mov v7.16b, v6.16b 1020 mov v6.16b, v5.16b 1021 1022 mov v5.16b, v4.16b 1023 mov v4.16b, v1.16b 1024 b.gt .L128_enc_blocks_more_than_4 1025 1026 mov v7.16b, v6.16b 1027 sub v30.4s, v30.4s, v31.4s 1028 mov v6.16b, v5.16b 1029 1030 mov v5.16b, v1.16b 1031 cmp x5, #48 1032 b.gt .L128_enc_blocks_more_than_3 1033 1034 sub v30.4s, v30.4s, v31.4s 1035 mov v7.16b, v6.16b 1036 mov v6.16b, v1.16b 1037 1038 cmp x5, #32 1039 ldr q24, [x3, #96] //load h4k | h3k 1040 b.gt .L128_enc_blocks_more_than_2 1041 1042 cmp x5, #16 1043 1044 sub v30.4s, v30.4s, v31.4s 1045 mov v7.16b, v1.16b 1046 b.gt .L128_enc_blocks_more_than_1 1047 1048 ldr q21, [x3, #48] //load h2k | h1k 1049 sub v30.4s, v30.4s, v31.4s 1050 b .L128_enc_blocks_less_than_1 1051.L128_enc_blocks_more_than_7: //blocks left > 7 1052 st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 1053 1054 rev64 v8.16b, v9.16b //GHASH final-7 block 1055 ldr q9, [x0], #16 //AES final-6 block - load plaintext 1056 1057 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1058 1059 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 1060 1061 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 1062 1063 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 1064 1065 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 1066 movi v16.8b, #0 //suppress further partial tag feed in 1067 1068.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 1069 1070 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 1071 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 1072.L128_enc_blocks_more_than_6: //blocks left > 6 1073 1074 st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 1075 1076 rev64 v8.16b, v9.16b //GHASH final-6 block 1077 ldr q9, [x0], #16 //AES final-5 block - load plaintext 1078 1079 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1080 1081 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 1082 1083.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 1084 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 1085 1086 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 1087 movi v16.8b, #0 //suppress further partial tag feed in 1088 1089 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 1090 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 1091 1092 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 1093 1094 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 1095 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 1096.L128_enc_blocks_more_than_5: //blocks left > 5 1097 1098 st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 1099 1100 rev64 v8.16b, v9.16b //GHASH final-5 block 1101 1102 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1103 1104 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 1105 ldr q9, [x0], #16 //AES final-4 block - load plaintext 1106 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 1107 1108 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 1109 1110 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 1111 1112 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 1113 1114.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 1115 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 1116 movi v16.8b, #0 //suppress further partial tag feed in 1117 1118 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 1119 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 1120 1121 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 1122.L128_enc_blocks_more_than_4: //blocks left > 4 1123 1124 st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 1125 1126 rev64 v8.16b, v9.16b //GHASH final-4 block 1127 1128 ldr q9, [x0], #16 //AES final-3 block - load plaintext 1129 1130 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1131 1132 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 1133 movi v16.8b, #0 //suppress further partial tag feed in 1134 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 1135 1136 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 1137 1138 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 1139 1140 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 1141 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 1142 1143 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 1144 1145.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 1146 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 1147.L128_enc_blocks_more_than_3: //blocks left > 3 1148 1149 st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 1150 1151 ldr q25, [x3, #112] //load h4l | h4h 1152 ext v25.16b, v25.16b, v25.16b, #8 1153 1154 rev64 v8.16b, v9.16b //GHASH final-3 block 1155 1156 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1157 movi v16.8b, #0 //suppress further partial tag feed in 1158 1159 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 1160 ldr q24, [x3, #96] //load h4k | h3k 1161 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 1162 1163 ldr q9, [x0], #16 //AES final-2 block - load plaintext 1164 1165 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 1166 1167 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 1168 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 1169 1170.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 1171 1172 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 1173 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 1174 1175 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 1176 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 1177.L128_enc_blocks_more_than_2: //blocks left > 2 1178 1179 st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 1180 1181 rev64 v8.16b, v9.16b //GHASH final-2 block 1182 1183 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1184 1185 ldr q9, [x0], #16 //AES final-1 block - load plaintext 1186 1187 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 1188 ldr q23, [x3, #80] //load h3l | h3h 1189 ext v23.16b, v23.16b, v23.16b, #8 1190 movi v16.8b, #0 //suppress further partial tag feed in 1191 1192 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 1193.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 1194 1195 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 1196 1197 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 1198 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 1199 1200 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 1201 1202 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 1203 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 1204.L128_enc_blocks_more_than_1: //blocks left > 1 1205 1206 st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 1207 1208 ldr q22, [x3, #64] //load h2l | h2h 1209 ext v22.16b, v22.16b, v22.16b, #8 1210 rev64 v8.16b, v9.16b //GHASH final-1 block 1211 ldr q9, [x0], #16 //AES final block - load plaintext 1212 1213 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1214 1215 movi v16.8b, #0 //suppress further partial tag feed in 1216 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 1217.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 1218 1219 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 1220 1221 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 1222 1223 ldr q21, [x3, #48] //load h2k | h1k 1224 1225 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 1226 1227 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 1228 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 1229 1230 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 1231 1232 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 1233 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 1234.L128_enc_blocks_less_than_1: //blocks left <= 1 1235 1236 rev32 v30.16b, v30.16b 1237 str q30, [x16] //store the updated counter 1238 and x1, x1, #127 //bit_length %= 128 1239 1240 sub x1, x1, #128 //bit_length -= 128 1241 1242 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 1243 1244 mvn x6, xzr //temp0_x = 0xffffffffffffffff 1245 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 1246 and x1, x1, #127 //bit_length %= 128 1247 1248 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 1249 mvn x7, xzr //temp1_x = 0xffffffffffffffff 1250 cmp x1, #64 1251 1252 csel x13, x7, x6, lt 1253 csel x14, x6, xzr, lt 1254 1255 mov v0.d[1], x14 1256 mov v0.d[0], x13 //ctr0b is mask for last block 1257 1258 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 1259 1260 rev64 v8.16b, v9.16b //GHASH final block 1261 1262 bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 1263 st1 { v9.16b}, [x2] //store all 16B 1264 1265 eor v8.16b, v8.16b, v16.16b //feed in partial tag 1266 1267 ins v16.d[0], v8.d[1] //GHASH final block - mid 1268 1269 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 1270 ldr q20, [x3, #32] //load h1l | h1h 1271 ext v20.16b, v20.16b, v20.16b, #8 1272 1273 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 1274 1275 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 1276 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 1277 ldr d16, [x10] //MODULO - load modulo constant 1278 1279 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 1280 1281 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 1282 1283 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 1284 1285 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 1286 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 1287 1288.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 1289 1290.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 1291 1292 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 1293 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 1294 1295.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 1296 ext v19.16b, v19.16b, v19.16b, #8 1297 rev64 v19.16b, v19.16b 1298 st1 { v19.16b }, [x3] 1299 mov x0, x9 1300 1301 ldp d10, d11, [sp, #16] 1302 ldp d12, d13, [sp, #32] 1303 ldp d14, d15, [sp, #48] 1304 ldp d8, d9, [sp], #80 1305 ret 1306 1307.L128_enc_ret: 1308 mov w0, #0x0 1309 ret 1310.size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel 1311.globl unroll8_eor3_aes_gcm_dec_128_kernel 1312.type unroll8_eor3_aes_gcm_dec_128_kernel,%function 1313.align 4 1314unroll8_eor3_aes_gcm_dec_128_kernel: 1315 AARCH64_VALID_CALL_TARGET 1316 cbz x1, .L128_dec_ret 1317 stp d8, d9, [sp, #-80]! 1318 lsr x9, x1, #3 1319 mov x16, x4 1320 mov x8, x5 1321 stp d10, d11, [sp, #16] 1322 stp d12, d13, [sp, #32] 1323 stp d14, d15, [sp, #48] 1324 mov x5, #0xc200000000000000 1325 stp x5, xzr, [sp, #64] 1326 add x10, sp, #64 1327 1328 mov x5, x9 1329 ld1 { v0.16b}, [x16] //CTR block 0 1330 1331 ldp q26, q27, [x8, #0] //load rk0, rk1 1332 sub x5, x5, #1 //byte_len - 1 1333 1334 mov x15, #0x100000000 //set up counter increment 1335 movi v31.16b, #0x0 1336 mov v31.d[1], x15 1337 ld1 { v19.16b}, [x3] 1338 ext v19.16b, v19.16b, v19.16b, #8 1339 rev64 v19.16b, v19.16b 1340 1341 rev32 v30.16b, v0.16b //set up reversed counter 1342 1343 aese v0.16b, v26.16b 1344 aesmc v0.16b, v0.16b //AES block 0 - round 0 1345 1346 add v30.4s, v30.4s, v31.4s //CTR block 0 1347 1348 rev32 v1.16b, v30.16b //CTR block 1 1349 add v30.4s, v30.4s, v31.4s //CTR block 1 1350 1351 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 1352 1353 rev32 v2.16b, v30.16b //CTR block 2 1354 add v30.4s, v30.4s, v31.4s //CTR block 2 1355 aese v1.16b, v26.16b 1356 aesmc v1.16b, v1.16b //AES block 1 - round 0 1357 1358 rev32 v3.16b, v30.16b //CTR block 3 1359 add v30.4s, v30.4s, v31.4s //CTR block 3 1360 1361 aese v0.16b, v27.16b 1362 aesmc v0.16b, v0.16b //AES block 0 - round 1 1363 aese v1.16b, v27.16b 1364 aesmc v1.16b, v1.16b //AES block 1 - round 1 1365 1366 rev32 v4.16b, v30.16b //CTR block 4 1367 add v30.4s, v30.4s, v31.4s //CTR block 4 1368 1369 rev32 v5.16b, v30.16b //CTR block 5 1370 add v30.4s, v30.4s, v31.4s //CTR block 5 1371 1372 aese v2.16b, v26.16b 1373 aesmc v2.16b, v2.16b //AES block 2 - round 0 1374 1375 rev32 v6.16b, v30.16b //CTR block 6 1376 add v30.4s, v30.4s, v31.4s //CTR block 6 1377 aese v5.16b, v26.16b 1378 aesmc v5.16b, v5.16b //AES block 5 - round 0 1379 1380 aese v3.16b, v26.16b 1381 aesmc v3.16b, v3.16b //AES block 3 - round 0 1382 aese v4.16b, v26.16b 1383 aesmc v4.16b, v4.16b //AES block 4 - round 0 1384 1385 rev32 v7.16b, v30.16b //CTR block 7 1386 1387 aese v6.16b, v26.16b 1388 aesmc v6.16b, v6.16b //AES block 6 - round 0 1389 aese v2.16b, v27.16b 1390 aesmc v2.16b, v2.16b //AES block 2 - round 1 1391 1392 aese v7.16b, v26.16b 1393 aesmc v7.16b, v7.16b //AES block 7 - round 0 1394 1395 ldp q28, q26, [x8, #32] //load rk2, rk3 1396 1397 aese v6.16b, v27.16b 1398 aesmc v6.16b, v6.16b //AES block 6 - round 1 1399 aese v5.16b, v27.16b 1400 aesmc v5.16b, v5.16b //AES block 5 - round 1 1401 1402 aese v4.16b, v27.16b 1403 aesmc v4.16b, v4.16b //AES block 4 - round 1 1404 aese v7.16b, v27.16b 1405 aesmc v7.16b, v7.16b //AES block 7 - round 1 1406 1407 aese v7.16b, v28.16b 1408 aesmc v7.16b, v7.16b //AES block 7 - round 2 1409 aese v0.16b, v28.16b 1410 aesmc v0.16b, v0.16b //AES block 0 - round 2 1411 aese v3.16b, v27.16b 1412 aesmc v3.16b, v3.16b //AES block 3 - round 1 1413 1414 aese v6.16b, v28.16b 1415 aesmc v6.16b, v6.16b //AES block 6 - round 2 1416 aese v2.16b, v28.16b 1417 aesmc v2.16b, v2.16b //AES block 2 - round 2 1418 aese v5.16b, v28.16b 1419 aesmc v5.16b, v5.16b //AES block 5 - round 2 1420 1421 aese v4.16b, v28.16b 1422 aesmc v4.16b, v4.16b //AES block 4 - round 2 1423 aese v3.16b, v28.16b 1424 aesmc v3.16b, v3.16b //AES block 3 - round 2 1425 aese v1.16b, v28.16b 1426 aesmc v1.16b, v1.16b //AES block 1 - round 2 1427 1428 aese v6.16b, v26.16b 1429 aesmc v6.16b, v6.16b //AES block 6 - round 3 1430 aese v2.16b, v26.16b 1431 aesmc v2.16b, v2.16b //AES block 2 - round 3 1432 1433 ldp q27, q28, [x8, #64] //load rk4, rk5 1434 aese v5.16b, v26.16b 1435 aesmc v5.16b, v5.16b //AES block 5 - round 3 1436 1437 aese v0.16b, v26.16b 1438 aesmc v0.16b, v0.16b //AES block 0 - round 3 1439 aese v7.16b, v26.16b 1440 aesmc v7.16b, v7.16b //AES block 7 - round 3 1441 1442 aese v3.16b, v26.16b 1443 aesmc v3.16b, v3.16b //AES block 3 - round 3 1444 aese v1.16b, v26.16b 1445 aesmc v1.16b, v1.16b //AES block 1 - round 3 1446 1447 aese v0.16b, v27.16b 1448 aesmc v0.16b, v0.16b //AES block 0 - round 4 1449 aese v7.16b, v27.16b 1450 aesmc v7.16b, v7.16b //AES block 7 - round 4 1451 aese v4.16b, v26.16b 1452 aesmc v4.16b, v4.16b //AES block 4 - round 3 1453 1454 aese v6.16b, v27.16b 1455 aesmc v6.16b, v6.16b //AES block 6 - round 4 1456 aese v1.16b, v27.16b 1457 aesmc v1.16b, v1.16b //AES block 1 - round 4 1458 aese v3.16b, v27.16b 1459 aesmc v3.16b, v3.16b //AES block 3 - round 4 1460 1461 aese v5.16b, v27.16b 1462 aesmc v5.16b, v5.16b //AES block 5 - round 4 1463 aese v4.16b, v27.16b 1464 aesmc v4.16b, v4.16b //AES block 4 - round 4 1465 aese v2.16b, v27.16b 1466 aesmc v2.16b, v2.16b //AES block 2 - round 4 1467 1468 ldp q26, q27, [x8, #96] //load rk6, rk7 1469 aese v2.16b, v28.16b 1470 aesmc v2.16b, v2.16b //AES block 2 - round 5 1471 aese v3.16b, v28.16b 1472 aesmc v3.16b, v3.16b //AES block 3 - round 5 1473 1474 aese v6.16b, v28.16b 1475 aesmc v6.16b, v6.16b //AES block 6 - round 5 1476 aese v1.16b, v28.16b 1477 aesmc v1.16b, v1.16b //AES block 1 - round 5 1478 1479 aese v7.16b, v28.16b 1480 aesmc v7.16b, v7.16b //AES block 7 - round 5 1481 aese v5.16b, v28.16b 1482 aesmc v5.16b, v5.16b //AES block 5 - round 5 1483 1484 aese v4.16b, v28.16b 1485 aesmc v4.16b, v4.16b //AES block 4 - round 5 1486 1487 aese v3.16b, v26.16b 1488 aesmc v3.16b, v3.16b //AES block 3 - round 6 1489 aese v2.16b, v26.16b 1490 aesmc v2.16b, v2.16b //AES block 2 - round 6 1491 aese v0.16b, v28.16b 1492 aesmc v0.16b, v0.16b //AES block 0 - round 5 1493 1494 aese v5.16b, v26.16b 1495 aesmc v5.16b, v5.16b //AES block 5 - round 6 1496 aese v4.16b, v26.16b 1497 aesmc v4.16b, v4.16b //AES block 4 - round 6 1498 aese v1.16b, v26.16b 1499 aesmc v1.16b, v1.16b //AES block 1 - round 6 1500 1501 aese v0.16b, v26.16b 1502 aesmc v0.16b, v0.16b //AES block 0 - round 6 1503 aese v7.16b, v26.16b 1504 aesmc v7.16b, v7.16b //AES block 7 - round 6 1505 aese v6.16b, v26.16b 1506 aesmc v6.16b, v6.16b //AES block 6 - round 6 1507 1508 aese v3.16b, v27.16b 1509 aesmc v3.16b, v3.16b //AES block 3 - round 7 1510 aese v4.16b, v27.16b 1511 aesmc v4.16b, v4.16b //AES block 4 - round 7 1512 aese v1.16b, v27.16b 1513 aesmc v1.16b, v1.16b //AES block 1 - round 7 1514 1515 aese v7.16b, v27.16b 1516 aesmc v7.16b, v7.16b //AES block 7 - round 7 1517 aese v5.16b, v27.16b 1518 aesmc v5.16b, v5.16b //AES block 5 - round 7 1519 ldp q28, q26, [x8, #128] //load rk8, rk9 1520 1521 aese v6.16b, v27.16b 1522 aesmc v6.16b, v6.16b //AES block 6 - round 7 1523 aese v2.16b, v27.16b 1524 aesmc v2.16b, v2.16b //AES block 2 - round 7 1525 aese v0.16b, v27.16b 1526 aesmc v0.16b, v0.16b //AES block 0 - round 7 1527 1528 add x5, x5, x0 1529 add v30.4s, v30.4s, v31.4s //CTR block 7 1530 1531 aese v6.16b, v28.16b 1532 aesmc v6.16b, v6.16b //AES block 6 - round 8 1533 aese v0.16b, v28.16b 1534 aesmc v0.16b, v0.16b //AES block 0 - round 8 1535 1536 aese v1.16b, v28.16b 1537 aesmc v1.16b, v1.16b //AES block 1 - round 8 1538 aese v7.16b, v28.16b 1539 aesmc v7.16b, v7.16b //AES block 7 - round 8 1540 aese v3.16b, v28.16b 1541 aesmc v3.16b, v3.16b //AES block 3 - round 8 1542 1543 aese v5.16b, v28.16b 1544 aesmc v5.16b, v5.16b //AES block 5 - round 8 1545 aese v2.16b, v28.16b 1546 aesmc v2.16b, v2.16b //AES block 2 - round 8 1547 aese v4.16b, v28.16b 1548 aesmc v4.16b, v4.16b //AES block 4 - round 8 1549 1550 aese v0.16b, v26.16b //AES block 0 - round 9 1551 aese v1.16b, v26.16b //AES block 1 - round 9 1552 aese v6.16b, v26.16b //AES block 6 - round 9 1553 1554 ldr q27, [x8, #160] //load rk10 1555 aese v4.16b, v26.16b //AES block 4 - round 9 1556 aese v3.16b, v26.16b //AES block 3 - round 9 1557 1558 aese v2.16b, v26.16b //AES block 2 - round 9 1559 aese v5.16b, v26.16b //AES block 5 - round 9 1560 aese v7.16b, v26.16b //AES block 7 - round 9 1561 1562 add x4, x0, x1, lsr #3 //end_input_ptr 1563 cmp x0, x5 //check if we have <= 8 blocks 1564 b.ge .L128_dec_tail //handle tail 1565 1566 ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 1567 1568.inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result 1569.inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result 1570 stp q0, q1, [x2], #32 //AES block 0, 1 - store result 1571 1572 rev32 v0.16b, v30.16b //CTR block 8 1573 add v30.4s, v30.4s, v31.4s //CTR block 8 1574 ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 1575 1576 ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 1577 1578 rev32 v1.16b, v30.16b //CTR block 9 1579 add v30.4s, v30.4s, v31.4s //CTR block 9 1580 ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 1581 1582.inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result 1583.inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result 1584 stp q2, q3, [x2], #32 //AES block 2, 3 - store result 1585 1586 rev32 v2.16b, v30.16b //CTR block 10 1587 add v30.4s, v30.4s, v31.4s //CTR block 10 1588 1589.inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result 1590 1591 rev32 v3.16b, v30.16b //CTR block 11 1592 add v30.4s, v30.4s, v31.4s //CTR block 11 1593 1594.inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result 1595.inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result 1596 stp q4, q5, [x2], #32 //AES block 4, 5 - store result 1597 1598.inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result 1599 stp q6, q7, [x2], #32 //AES block 6, 7 - store result 1600 rev32 v4.16b, v30.16b //CTR block 12 1601 1602 cmp x0, x5 //check if we have <= 8 blocks 1603 add v30.4s, v30.4s, v31.4s //CTR block 12 1604 b.ge .L128_dec_prepretail //do prepretail 1605 1606.L128_dec_main_loop: //main loop start 1607 ldr q23, [x3, #176] //load h7l | h7h 1608 ext v23.16b, v23.16b, v23.16b, #8 1609 ldr q25, [x3, #208] //load h8l | h8h 1610 ext v25.16b, v25.16b, v25.16b, #8 1611 1612 rev64 v9.16b, v9.16b //GHASH block 8k+1 1613 rev64 v8.16b, v8.16b //GHASH block 8k 1614 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 1615 1616 rev64 v14.16b, v14.16b //GHASH block 8k+6 1617 ldr q20, [x3, #128] //load h5l | h5h 1618 ext v20.16b, v20.16b, v20.16b, #8 1619 ldr q22, [x3, #160] //load h6l | h6h 1620 ext v22.16b, v22.16b, v22.16b, #8 1621 1622 eor v8.16b, v8.16b, v19.16b //PRE 1 1623 rev32 v5.16b, v30.16b //CTR block 8k+13 1624 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 1625 1626 rev64 v10.16b, v10.16b //GHASH block 8k+2 1627 rev64 v12.16b, v12.16b //GHASH block 8k+4 1628 ldp q26, q27, [x8, #0] //load rk0, rk1 1629 1630 rev32 v6.16b, v30.16b //CTR block 8k+14 1631 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 1632 ldr q21, [x3, #144] //load h6k | h5k 1633 ldr q24, [x3, #192] //load h8k | h7k 1634 1635 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 1636 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 1637 rev64 v11.16b, v11.16b //GHASH block 8k+3 1638 1639 rev32 v7.16b, v30.16b //CTR block 8k+15 1640 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1641 rev64 v13.16b, v13.16b //GHASH block 8k+5 1642 1643 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 1644 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 1645 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1646 1647 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 1648 aese v4.16b, v26.16b 1649 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 1650 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 1651 1652 aese v6.16b, v26.16b 1653 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 1654 aese v5.16b, v26.16b 1655 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 1656 aese v7.16b, v26.16b 1657 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 1658 1659 aese v3.16b, v26.16b 1660 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 1661 aese v2.16b, v26.16b 1662 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 1663 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 1664 1665 aese v1.16b, v26.16b 1666 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 1667 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 1668 aese v0.16b, v26.16b 1669 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 1670 1671 aese v2.16b, v27.16b 1672 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 1673 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 1674.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 1675 1676 ldp q28, q26, [x8, #32] //load rk2, rk3 1677 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 1678 aese v7.16b, v27.16b 1679 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 1680 1681 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 1682 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 1683 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 1684 1685 ldr q23, [x3, #80] //load h3l | h3h 1686 ext v23.16b, v23.16b, v23.16b, #8 1687 ldr q25, [x3, #112] //load h4l | h4h 1688 ext v25.16b, v25.16b, v25.16b, #8 1689 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 1690 aese v6.16b, v27.16b 1691 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 1692 1693 aese v4.16b, v27.16b 1694 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 1695 aese v5.16b, v27.16b 1696 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 1697 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 1698 1699 aese v3.16b, v27.16b 1700 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 1701 aese v0.16b, v27.16b 1702 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 1703 aese v1.16b, v27.16b 1704 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 1705 1706 aese v7.16b, v28.16b 1707 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 1708 aese v2.16b, v28.16b 1709 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 1710.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 1711 1712 aese v4.16b, v28.16b 1713 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 1714 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 1715 ldr q20, [x3, #32] //load h1l | h1h 1716 ext v20.16b, v20.16b, v20.16b, #8 1717 ldr q22, [x3, #64] //load h2l | h2h 1718 ext v22.16b, v22.16b, v22.16b, #8 1719 1720 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 1721 aese v1.16b, v28.16b 1722 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 1723 aese v3.16b, v28.16b 1724 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 1725 1726 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 1727 aese v5.16b, v28.16b 1728 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 1729 aese v0.16b, v28.16b 1730 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 1731 1732 aese v6.16b, v28.16b 1733 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 1734 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 1735 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 1736 1737 aese v7.16b, v26.16b 1738 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 1739 rev64 v15.16b, v15.16b //GHASH block 8k+7 1740 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 1741 1742 ldp q27, q28, [x8, #64] //load rk4, rk5 1743 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 1744.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 1745 1746 ldr q21, [x3, #48] //load h2k | h1k 1747 ldr q24, [x3, #96] //load h4k | h3k 1748 aese v2.16b, v26.16b 1749 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 1750 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 1751 1752 aese v4.16b, v26.16b 1753 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 1754 aese v3.16b, v26.16b 1755 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 1756 aese v1.16b, v26.16b 1757 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 1758 1759 aese v0.16b, v26.16b 1760 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 1761 aese v6.16b, v26.16b 1762 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 1763 aese v5.16b, v26.16b 1764 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 1765 1766 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 1767 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 1768 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 1769 1770 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 1771 aese v0.16b, v27.16b 1772 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 1773 aese v7.16b, v27.16b 1774 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 1775 1776 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 1777 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 1778 aese v3.16b, v27.16b 1779 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 1780 1781 aese v1.16b, v27.16b 1782 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 1783 aese v5.16b, v27.16b 1784 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 1785 aese v6.16b, v27.16b 1786 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 1787 1788 aese v2.16b, v27.16b 1789 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 1790 aese v4.16b, v27.16b 1791 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 1792 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 1793 1794 ldp q26, q27, [x8, #96] //load rk6, rk7 1795 aese v0.16b, v28.16b 1796 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 1797 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 1798 1799 aese v2.16b, v28.16b 1800 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 1801 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 1802 aese v1.16b, v28.16b 1803 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 1804 1805 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 1806 aese v6.16b, v28.16b 1807 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 1808 aese v7.16b, v28.16b 1809 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 1810 1811 aese v3.16b, v28.16b 1812 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 1813 aese v5.16b, v28.16b 1814 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 1815 aese v4.16b, v28.16b 1816 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 1817 1818 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 1819.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 1820.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 1821 1822 aese v3.16b, v26.16b 1823 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 1824.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 1825 aese v7.16b, v26.16b 1826 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 1827 1828 aese v1.16b, v26.16b 1829 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 1830 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 1831 aese v6.16b, v26.16b 1832 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 1833 1834 aese v2.16b, v26.16b 1835 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 1836 aese v5.16b, v26.16b 1837 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 1838 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 1839 1840 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 1841 aese v0.16b, v26.16b 1842 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 1843 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 1844 1845.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 1846 aese v4.16b, v26.16b 1847 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 1848 ldp q28, q26, [x8, #128] //load rk8, rk9 1849 1850 ldr d16, [x10] //MODULO - load modulo constant 1851.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 1852 aese v5.16b, v27.16b 1853 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 1854 1855 rev32 v20.16b, v30.16b //CTR block 8k+16 1856.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 1857 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 1858 1859 aese v6.16b, v27.16b 1860 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 1861 aese v3.16b, v27.16b 1862 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 1863 aese v7.16b, v27.16b 1864 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 1865 1866 aese v2.16b, v27.16b 1867 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 1868 aese v1.16b, v27.16b 1869 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 1870 rev32 v22.16b, v30.16b //CTR block 8k+17 1871 1872 aese v4.16b, v27.16b 1873 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 1874 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 1875 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 1876 1877.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 1878 aese v0.16b, v27.16b 1879 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 1880 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 1881 1882 aese v5.16b, v28.16b 1883 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 1884 aese v1.16b, v28.16b 1885 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 1886 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 1887 1888 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 1889 aese v0.16b, v28.16b 1890 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 1891 rev32 v23.16b, v30.16b //CTR block 8k+18 1892 1893 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 1894 aese v4.16b, v28.16b 1895 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 1896.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 1897 1898 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 1899 aese v3.16b, v28.16b 1900 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 1901 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 1902 1903 aese v7.16b, v28.16b 1904 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 1905 aese v2.16b, v28.16b 1906 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 1907 aese v6.16b, v28.16b 1908 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 1909 1910 aese v0.16b, v26.16b //AES block 8k+8 - round 9 1911 aese v1.16b, v26.16b //AES block 8k+9 - round 9 1912 ldr q27, [x8, #160] //load rk10 1913 1914 aese v6.16b, v26.16b //AES block 8k+14 - round 9 1915 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 1916 aese v2.16b, v26.16b //AES block 8k+10 - round 9 1917 1918 aese v7.16b, v26.16b //AES block 8k+15 - round 9 1919 aese v4.16b, v26.16b //AES block 8k+12 - round 9 1920 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 1921 1922 rev32 v25.16b, v30.16b //CTR block 8k+19 1923 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 1924 1925 aese v3.16b, v26.16b //AES block 8k+11 - round 9 1926 aese v5.16b, v26.16b //AES block 8k+13 - round 9 1927.inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result 1928 1929.inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result 1930.inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 8k+15 - result 1931.inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 8k+14 - result 1932 1933.inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result 1934 stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 1935 mov v1.16b, v22.16b //CTR block 8k+17 1936 1937.inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 8k+12 - result 1938.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 1939 mov v0.16b, v20.16b //CTR block 8k+16 1940 1941.inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result 1942 cmp x0, x5 //.LOOP CONTROL 1943 stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 1944 1945.inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 8k+13 - result 1946 mov v2.16b, v23.16b //CTR block 8k+18 1947 1948 stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 1949 rev32 v4.16b, v30.16b //CTR block 8k+20 1950 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 1951 1952 stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 1953 mov v3.16b, v25.16b //CTR block 8k+19 1954 b.lt .L128_dec_main_loop 1955 1956.L128_dec_prepretail: //PREPRETAIL 1957 rev64 v11.16b, v11.16b //GHASH block 8k+3 1958 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 1959 rev64 v8.16b, v8.16b //GHASH block 8k 1960 1961 rev64 v10.16b, v10.16b //GHASH block 8k+2 1962 rev32 v5.16b, v30.16b //CTR block 8k+13 1963 ldp q26, q27, [x8, #0] //load rk0, rk1 1964 1965 ldr q23, [x3, #176] //load h7l | h7h 1966 ext v23.16b, v23.16b, v23.16b, #8 1967 ldr q25, [x3, #208] //load h8l | h8h 1968 ext v25.16b, v25.16b, v25.16b, #8 1969 eor v8.16b, v8.16b, v19.16b //PRE 1 1970 rev64 v9.16b, v9.16b //GHASH block 8k+1 1971 1972 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 1973 ldr q20, [x3, #128] //load h5l | h5h 1974 ext v20.16b, v20.16b, v20.16b, #8 1975 ldr q22, [x3, #160] //load h6l | h6h 1976 ext v22.16b, v22.16b, v22.16b, #8 1977 rev64 v13.16b, v13.16b //GHASH block 8k+5 1978 1979 rev64 v12.16b, v12.16b //GHASH block 8k+4 1980 1981 rev64 v14.16b, v14.16b //GHASH block 8k+6 1982 1983 ldr q21, [x3, #144] //load h6k | h5k 1984 ldr q24, [x3, #192] //load h8k | h7k 1985 rev32 v6.16b, v30.16b //CTR block 8k+14 1986 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 1987 1988 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 1989 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 1990 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 1991 1992 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1993 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 1994 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 1995 1996 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 1997 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 1998 aese v0.16b, v26.16b 1999 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 2000 2001 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 2002 aese v4.16b, v26.16b 2003 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 2004 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 2005 2006 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 2007 rev32 v7.16b, v30.16b //CTR block 8k+15 2008 aese v3.16b, v26.16b 2009 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 2010 2011.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 2012 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 2013 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 2014 2015 aese v2.16b, v26.16b 2016 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 2017 aese v1.16b, v26.16b 2018 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 2019 aese v5.16b, v26.16b 2020 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 2021 2022 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 2023 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 2024 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 2025 2026 aese v2.16b, v27.16b 2027 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 2028 aese v7.16b, v26.16b 2029 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 2030 aese v6.16b, v26.16b 2031 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 2032 2033 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 2034 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 2035 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 2036 2037 aese v6.16b, v27.16b 2038 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 2039 aese v4.16b, v27.16b 2040 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 2041 aese v5.16b, v27.16b 2042 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 2043 2044 ldp q28, q26, [x8, #32] //load rk2, rk3 2045.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 2046 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 2047 2048 ldr q23, [x3, #80] //load h3l | h3h 2049 ext v23.16b, v23.16b, v23.16b, #8 2050 ldr q25, [x3, #112] //load h4l | h4h 2051 ext v25.16b, v25.16b, v25.16b, #8 2052 aese v1.16b, v27.16b 2053 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 2054 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 2055 2056 aese v3.16b, v27.16b 2057 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 2058 aese v7.16b, v27.16b 2059 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 2060 aese v0.16b, v27.16b 2061 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 2062 2063 ldr q20, [x3, #32] //load h1l | h1h 2064 ext v20.16b, v20.16b, v20.16b, #8 2065 ldr q22, [x3, #64] //load h2l | h2h 2066 ext v22.16b, v22.16b, v22.16b, #8 2067.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 2068 2069 aese v0.16b, v28.16b 2070 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 2071 aese v6.16b, v28.16b 2072 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 2073 aese v2.16b, v28.16b 2074 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 2075 2076 aese v4.16b, v28.16b 2077 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 2078 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 2079 aese v7.16b, v28.16b 2080 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 2081 2082 aese v1.16b, v28.16b 2083 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 2084 aese v5.16b, v28.16b 2085 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 2086 aese v3.16b, v28.16b 2087 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 2088 2089 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 2090 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 2091 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 2092 2093 ldp q27, q28, [x8, #64] //load rk4, rk5 2094 rev64 v15.16b, v15.16b //GHASH block 8k+7 2095 aese v6.16b, v26.16b 2096 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 2097 2098 ldr q21, [x3, #48] //load h2k | h1k 2099 ldr q24, [x3, #96] //load h4k | h3k 2100 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 2101 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 2102 2103 aese v2.16b, v26.16b 2104 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 2105 aese v0.16b, v26.16b 2106 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 2107 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 2108 2109 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 2110 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 2111 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 2112 2113 aese v4.16b, v26.16b 2114 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 2115 aese v3.16b, v26.16b 2116 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 2117 aese v7.16b, v26.16b 2118 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 2119 2120 aese v1.16b, v26.16b 2121 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 2122 aese v5.16b, v26.16b 2123 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 2124 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 2125 2126.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 2127 aese v0.16b, v27.16b 2128 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 2129 aese v2.16b, v27.16b 2130 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 2131 2132 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 2133 aese v5.16b, v27.16b 2134 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 2135 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 2136 2137 aese v1.16b, v27.16b 2138 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 2139 aese v6.16b, v27.16b 2140 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 2141 aese v4.16b, v27.16b 2142 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 2143 2144 aese v7.16b, v27.16b 2145 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 2146 aese v3.16b, v27.16b 2147 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 2148 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 2149 2150 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 2151 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 2152 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 2153 2154 ldp q26, q27, [x8, #96] //load rk6, rk7 2155.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 2156 aese v6.16b, v28.16b 2157 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 2158 2159 ldr d16, [x10] //MODULO - load modulo constant 2160 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 2161.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 2162 2163 aese v0.16b, v28.16b 2164 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 2165 aese v2.16b, v28.16b 2166 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 2167 aese v4.16b, v28.16b 2168 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 2169 2170 aese v3.16b, v28.16b 2171 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 2172 aese v1.16b, v28.16b 2173 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 2174 aese v5.16b, v28.16b 2175 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 2176 2177 aese v7.16b, v28.16b 2178 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 2179.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 2180.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 2181 2182 aese v4.16b, v26.16b 2183 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 2184 aese v1.16b, v26.16b 2185 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 2186 aese v2.16b, v26.16b 2187 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 2188 2189.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 2190 aese v5.16b, v26.16b 2191 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 2192 aese v0.16b, v26.16b 2193 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 2194 2195 aese v3.16b, v26.16b 2196 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 2197 aese v6.16b, v26.16b 2198 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 2199 aese v7.16b, v26.16b 2200 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 2201 2202 aese v4.16b, v27.16b 2203 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 2204.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 2205 ldp q28, q26, [x8, #128] //load rk8, rk9 2206 2207 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 2208 aese v3.16b, v27.16b 2209 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 2210 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 2211 2212 aese v5.16b, v27.16b 2213 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 2214 aese v6.16b, v27.16b 2215 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 2216 aese v0.16b, v27.16b 2217 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 2218 2219 aese v7.16b, v27.16b 2220 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 2221 aese v1.16b, v27.16b 2222 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 2223 aese v2.16b, v27.16b 2224 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 2225 2226.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 2227 ldr q27, [x8, #160] //load rk10 2228 2229 aese v3.16b, v28.16b 2230 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 2231 aese v0.16b, v28.16b 2232 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 2233 2234 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 2235 aese v6.16b, v28.16b 2236 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 2237 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 2238 2239 aese v2.16b, v28.16b 2240 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 2241 aese v1.16b, v28.16b 2242 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 2243 aese v7.16b, v28.16b 2244 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 2245 2246 aese v6.16b, v26.16b //AES block 8k+14 - round 9 2247 aese v5.16b, v28.16b 2248 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 2249 aese v4.16b, v28.16b 2250 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 2251 2252.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 2253 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 2254 aese v2.16b, v26.16b //AES block 8k+10 - round 9 2255 2256 aese v3.16b, v26.16b //AES block 8k+11 - round 9 2257 aese v5.16b, v26.16b //AES block 8k+13 - round 9 2258 aese v0.16b, v26.16b //AES block 8k+8 - round 9 2259 2260 aese v4.16b, v26.16b //AES block 8k+12 - round 9 2261 aese v1.16b, v26.16b //AES block 8k+9 - round 9 2262 aese v7.16b, v26.16b //AES block 8k+15 - round 9 2263 2264.L128_dec_tail: //TAIL 2265 2266 mov v29.16b, v27.16b 2267 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 2268 2269 cmp x5, #112 2270 2271 ldp q24, q25, [x3, #192] //load h8k | h7k 2272 ext v25.16b, v25.16b, v25.16b, #8 2273 ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 2274 2275 ldp q20, q21, [x3, #128] //load h5l | h5h 2276 ext v20.16b, v20.16b, v20.16b, #8 2277 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 2278 2279 ldp q22, q23, [x3, #160] //load h6l | h6h 2280 ext v22.16b, v22.16b, v22.16b, #8 2281 ext v23.16b, v23.16b, v23.16b, #8 2282 2283.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 2284 b.gt .L128_dec_blocks_more_than_7 2285 2286 cmp x5, #96 2287 mov v7.16b, v6.16b 2288 movi v19.8b, #0 2289 2290 movi v17.8b, #0 2291 mov v6.16b, v5.16b 2292 mov v5.16b, v4.16b 2293 2294 mov v4.16b, v3.16b 2295 mov v3.16b, v2.16b 2296 mov v2.16b, v1.16b 2297 2298 movi v18.8b, #0 2299 sub v30.4s, v30.4s, v31.4s 2300 b.gt .L128_dec_blocks_more_than_6 2301 2302 cmp x5, #80 2303 sub v30.4s, v30.4s, v31.4s 2304 2305 mov v7.16b, v6.16b 2306 mov v6.16b, v5.16b 2307 mov v5.16b, v4.16b 2308 2309 mov v4.16b, v3.16b 2310 mov v3.16b, v1.16b 2311 b.gt .L128_dec_blocks_more_than_5 2312 2313 cmp x5, #64 2314 2315 mov v7.16b, v6.16b 2316 mov v6.16b, v5.16b 2317 mov v5.16b, v4.16b 2318 2319 mov v4.16b, v1.16b 2320 sub v30.4s, v30.4s, v31.4s 2321 b.gt .L128_dec_blocks_more_than_4 2322 2323 sub v30.4s, v30.4s, v31.4s 2324 mov v7.16b, v6.16b 2325 mov v6.16b, v5.16b 2326 2327 mov v5.16b, v1.16b 2328 cmp x5, #48 2329 b.gt .L128_dec_blocks_more_than_3 2330 2331 sub v30.4s, v30.4s, v31.4s 2332 mov v7.16b, v6.16b 2333 cmp x5, #32 2334 2335 ldr q24, [x3, #96] //load h4k | h3k 2336 mov v6.16b, v1.16b 2337 b.gt .L128_dec_blocks_more_than_2 2338 2339 cmp x5, #16 2340 2341 mov v7.16b, v1.16b 2342 sub v30.4s, v30.4s, v31.4s 2343 b.gt .L128_dec_blocks_more_than_1 2344 2345 sub v30.4s, v30.4s, v31.4s 2346 ldr q21, [x3, #48] //load h2k | h1k 2347 b .L128_dec_blocks_less_than_1 2348.L128_dec_blocks_more_than_7: //blocks left > 7 2349 rev64 v8.16b, v9.16b //GHASH final-7 block 2350 2351 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2352 2353 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 2354 2355 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 2356 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 2357 2358 movi v16.8b, #0 //suppress further partial tag feed in 2359 ldr q9, [x0], #16 //AES final-6 block - load ciphertext 2360 2361 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 2362 2363 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 2364 st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 2365.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 2366 2367 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 2368.L128_dec_blocks_more_than_6: //blocks left > 6 2369 2370 rev64 v8.16b, v9.16b //GHASH final-6 block 2371 2372 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2373 2374 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 2375 2376 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 2377 2378 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 2379 ldr q9, [x0], #16 //AES final-5 block - load ciphertext 2380 movi v16.8b, #0 //suppress further partial tag feed in 2381 2382 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 2383 st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 2384 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 2385 2386 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 2387 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 2388 2389 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 2390.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 2391.L128_dec_blocks_more_than_5: //blocks left > 5 2392 2393 rev64 v8.16b, v9.16b //GHASH final-5 block 2394 2395 ldr q9, [x0], #16 //AES final-4 block - load ciphertext 2396 st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 2397 2398 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2399 2400 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 2401 2402.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 2403 2404 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 2405 2406 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 2407 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 2408 movi v16.8b, #0 //suppress further partial tag feed in 2409 2410 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 2411 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 2412 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 2413 2414 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 2415 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 2416.L128_dec_blocks_more_than_4: //blocks left > 4 2417 2418 rev64 v8.16b, v9.16b //GHASH final-4 block 2419 2420 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2421 ldr q9, [x0], #16 //AES final-3 block - load ciphertext 2422 2423 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 2424 movi v16.8b, #0 //suppress further partial tag feed in 2425 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 2426 2427 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 2428 2429 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 2430 2431 st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 2432 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 2433 2434.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 2435 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 2436 2437 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 2438 2439 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 2440.L128_dec_blocks_more_than_3: //blocks left > 3 2441 2442 st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 2443 rev64 v8.16b, v9.16b //GHASH final-3 block 2444 2445 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2446 2447 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 2448 2449 ldr q25, [x3, #112] //load h4l | h4h 2450 ext v25.16b, v25.16b, v25.16b, #8 2451 ldr q24, [x3, #96] //load h4k | h3k 2452 2453 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 2454 2455 ldr q9, [x0], #16 //AES final-2 block - load ciphertext 2456 2457 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 2458 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 2459 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 2460 2461 movi v16.8b, #0 //suppress further partial tag feed in 2462.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 2463 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 2464 2465 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 2466 2467 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 2468 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 2469.L128_dec_blocks_more_than_2: //blocks left > 2 2470 2471 rev64 v8.16b, v9.16b //GHASH final-2 block 2472 2473 st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 2474 2475 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2476 ldr q23, [x3, #80] //load h3l | h3h 2477 ext v23.16b, v23.16b, v23.16b, #8 2478 movi v16.8b, #0 //suppress further partial tag feed in 2479 2480 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 2481 2482 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 2483 2484 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 2485 2486 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 2487 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 2488 ldr q9, [x0], #16 //AES final-1 block - load ciphertext 2489 2490 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 2491 2492 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 2493 2494.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 2495 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 2496.L128_dec_blocks_more_than_1: //blocks left > 1 2497 2498 st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 2499 rev64 v8.16b, v9.16b //GHASH final-1 block 2500 2501 ldr q22, [x3, #64] //load h2l | h2h 2502 ext v22.16b, v22.16b, v22.16b, #8 2503 2504 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2505 2506 movi v16.8b, #0 //suppress further partial tag feed in 2507 2508 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 2509 2510 ldr q9, [x0], #16 //AES final block - load ciphertext 2511 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 2512 2513 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 2514 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 2515 ldr q21, [x3, #48] //load h2k | h1k 2516 2517 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 2518.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 2519 2520 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 2521 2522 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 2523 2524 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 2525 2526 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 2527.L128_dec_blocks_less_than_1: //blocks left <= 1 2528 2529 and x1, x1, #127 //bit_length %= 128 2530 2531 sub x1, x1, #128 //bit_length -= 128 2532 2533 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 2534 2535 mvn x6, xzr //temp0_x = 0xffffffffffffffff 2536 and x1, x1, #127 //bit_length %= 128 2537 2538 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 2539 cmp x1, #64 2540 mvn x7, xzr //temp1_x = 0xffffffffffffffff 2541 2542 csel x13, x7, x6, lt 2543 csel x14, x6, xzr, lt 2544 2545 mov v0.d[1], x14 2546 mov v0.d[0], x13 //ctr0b is mask for last block 2547 2548 ldr q20, [x3, #32] //load h1l | h1h 2549 ext v20.16b, v20.16b, v20.16b, #8 2550 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 2551 2552 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 2553 2554 rev64 v8.16b, v9.16b //GHASH final block 2555 2556 eor v8.16b, v8.16b, v16.16b //feed in partial tag 2557 2558 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 2559 ins v16.d[0], v8.d[1] //GHASH final block - mid 2560 2561 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 2562 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 2563 2564 bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 2565 2566 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 2567 st1 { v12.16b}, [x2] //store all 16B 2568 2569 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 2570 2571 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 2572 ldr d16, [x10] //MODULO - load modulo constant 2573 2574 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 2575 2576 eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 2577 2578 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 2579 ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 2580 2581 eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 2582 2583.inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid 2584 2585 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 2586 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 2587 2588.inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low 2589 ext v19.16b, v19.16b, v19.16b, #8 2590 rev64 v19.16b, v19.16b 2591 st1 { v19.16b }, [x3] 2592 rev32 v30.16b, v30.16b 2593 2594 str q30, [x16] //store the updated counter 2595 2596 mov x0, x9 2597 2598 ldp d10, d11, [sp, #16] 2599 ldp d12, d13, [sp, #32] 2600 ldp d14, d15, [sp, #48] 2601 ldp d8, d9, [sp], #80 2602 ret 2603.L128_dec_ret: 2604 mov w0, #0x0 2605 ret 2606.size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel 2607.globl unroll8_eor3_aes_gcm_enc_192_kernel 2608.type unroll8_eor3_aes_gcm_enc_192_kernel,%function 2609.align 4 2610unroll8_eor3_aes_gcm_enc_192_kernel: 2611 AARCH64_VALID_CALL_TARGET 2612 cbz x1, .L192_enc_ret 2613 stp d8, d9, [sp, #-80]! 2614 lsr x9, x1, #3 2615 mov x16, x4 2616 mov x8, x5 2617 stp d10, d11, [sp, #16] 2618 stp d12, d13, [sp, #32] 2619 stp d14, d15, [sp, #48] 2620 mov x5, #0xc200000000000000 2621 stp x5, xzr, [sp, #64] 2622 add x10, sp, #64 2623 2624 mov x5, x9 2625 ld1 { v0.16b}, [x16] //CTR block 0 2626 2627 mov x15, #0x100000000 //set up counter increment 2628 movi v31.16b, #0x0 2629 mov v31.d[1], x15 2630 2631 rev32 v30.16b, v0.16b //set up reversed counter 2632 2633 add v30.4s, v30.4s, v31.4s //CTR block 0 2634 2635 rev32 v1.16b, v30.16b //CTR block 1 2636 add v30.4s, v30.4s, v31.4s //CTR block 1 2637 2638 rev32 v2.16b, v30.16b //CTR block 2 2639 add v30.4s, v30.4s, v31.4s //CTR block 2 2640 2641 rev32 v3.16b, v30.16b //CTR block 3 2642 add v30.4s, v30.4s, v31.4s //CTR block 3 2643 2644 rev32 v4.16b, v30.16b //CTR block 4 2645 add v30.4s, v30.4s, v31.4s //CTR block 4 2646 sub x5, x5, #1 //byte_len - 1 2647 2648 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 2649 2650 rev32 v5.16b, v30.16b //CTR block 5 2651 add v30.4s, v30.4s, v31.4s //CTR block 5 2652 ldp q26, q27, [x8, #0] //load rk0, rk1 2653 2654 add x5, x5, x0 2655 2656 rev32 v6.16b, v30.16b //CTR block 6 2657 add v30.4s, v30.4s, v31.4s //CTR block 6 2658 2659 rev32 v7.16b, v30.16b //CTR block 7 2660 2661 aese v5.16b, v26.16b 2662 aesmc v5.16b, v5.16b //AES block 5 - round 0 2663 aese v4.16b, v26.16b 2664 aesmc v4.16b, v4.16b //AES block 4 - round 0 2665 aese v3.16b, v26.16b 2666 aesmc v3.16b, v3.16b //AES block 3 - round 0 2667 2668 aese v0.16b, v26.16b 2669 aesmc v0.16b, v0.16b //AES block 0 - round 0 2670 aese v1.16b, v26.16b 2671 aesmc v1.16b, v1.16b //AES block 1 - round 0 2672 aese v7.16b, v26.16b 2673 aesmc v7.16b, v7.16b //AES block 7 - round 0 2674 2675 aese v6.16b, v26.16b 2676 aesmc v6.16b, v6.16b //AES block 6 - round 0 2677 aese v2.16b, v26.16b 2678 aesmc v2.16b, v2.16b //AES block 2 - round 0 2679 ldp q28, q26, [x8, #32] //load rk2, rk3 2680 2681 aese v5.16b, v27.16b 2682 aesmc v5.16b, v5.16b //AES block 5 - round 1 2683 aese v7.16b, v27.16b 2684 aesmc v7.16b, v7.16b //AES block 7 - round 1 2685 2686 aese v2.16b, v27.16b 2687 aesmc v2.16b, v2.16b //AES block 2 - round 1 2688 aese v3.16b, v27.16b 2689 aesmc v3.16b, v3.16b //AES block 3 - round 1 2690 aese v6.16b, v27.16b 2691 aesmc v6.16b, v6.16b //AES block 6 - round 1 2692 2693 aese v5.16b, v28.16b 2694 aesmc v5.16b, v5.16b //AES block 5 - round 2 2695 aese v4.16b, v27.16b 2696 aesmc v4.16b, v4.16b //AES block 4 - round 1 2697 aese v0.16b, v27.16b 2698 aesmc v0.16b, v0.16b //AES block 0 - round 1 2699 2700 aese v1.16b, v27.16b 2701 aesmc v1.16b, v1.16b //AES block 1 - round 1 2702 aese v7.16b, v28.16b 2703 aesmc v7.16b, v7.16b //AES block 7 - round 2 2704 aese v3.16b, v28.16b 2705 aesmc v3.16b, v3.16b //AES block 3 - round 2 2706 2707 aese v2.16b, v28.16b 2708 aesmc v2.16b, v2.16b //AES block 2 - round 2 2709 aese v0.16b, v28.16b 2710 aesmc v0.16b, v0.16b //AES block 0 - round 2 2711 2712 aese v1.16b, v28.16b 2713 aesmc v1.16b, v1.16b //AES block 1 - round 2 2714 aese v4.16b, v28.16b 2715 aesmc v4.16b, v4.16b //AES block 4 - round 2 2716 aese v6.16b, v28.16b 2717 aesmc v6.16b, v6.16b //AES block 6 - round 2 2718 2719 ldp q27, q28, [x8, #64] //load rk4, rk5 2720 aese v4.16b, v26.16b 2721 aesmc v4.16b, v4.16b //AES block 4 - round 3 2722 2723 aese v7.16b, v26.16b 2724 aesmc v7.16b, v7.16b //AES block 7 - round 3 2725 aese v3.16b, v26.16b 2726 aesmc v3.16b, v3.16b //AES block 3 - round 3 2727 aese v2.16b, v26.16b 2728 aesmc v2.16b, v2.16b //AES block 2 - round 3 2729 2730 aese v1.16b, v26.16b 2731 aesmc v1.16b, v1.16b //AES block 1 - round 3 2732 2733 aese v0.16b, v26.16b 2734 aesmc v0.16b, v0.16b //AES block 0 - round 3 2735 2736 aese v6.16b, v26.16b 2737 aesmc v6.16b, v6.16b //AES block 6 - round 3 2738 2739 aese v0.16b, v27.16b 2740 aesmc v0.16b, v0.16b //AES block 0 - round 4 2741 aese v1.16b, v27.16b 2742 aesmc v1.16b, v1.16b //AES block 1 - round 4 2743 aese v5.16b, v26.16b 2744 aesmc v5.16b, v5.16b //AES block 5 - round 3 2745 2746 aese v3.16b, v27.16b 2747 aesmc v3.16b, v3.16b //AES block 3 - round 4 2748 aese v2.16b, v27.16b 2749 aesmc v2.16b, v2.16b //AES block 2 - round 4 2750 aese v4.16b, v27.16b 2751 aesmc v4.16b, v4.16b //AES block 4 - round 4 2752 2753 aese v6.16b, v27.16b 2754 aesmc v6.16b, v6.16b //AES block 6 - round 4 2755 aese v7.16b, v27.16b 2756 aesmc v7.16b, v7.16b //AES block 7 - round 4 2757 aese v5.16b, v27.16b 2758 aesmc v5.16b, v5.16b //AES block 5 - round 4 2759 2760 aese v1.16b, v28.16b 2761 aesmc v1.16b, v1.16b //AES block 1 - round 5 2762 ldp q26, q27, [x8, #96] //load rk6, rk7 2763 aese v2.16b, v28.16b 2764 aesmc v2.16b, v2.16b //AES block 2 - round 5 2765 2766 aese v4.16b, v28.16b 2767 aesmc v4.16b, v4.16b //AES block 4 - round 5 2768 aese v7.16b, v28.16b 2769 aesmc v7.16b, v7.16b //AES block 7 - round 5 2770 aese v0.16b, v28.16b 2771 aesmc v0.16b, v0.16b //AES block 0 - round 5 2772 2773 aese v5.16b, v28.16b 2774 aesmc v5.16b, v5.16b //AES block 5 - round 5 2775 aese v6.16b, v28.16b 2776 aesmc v6.16b, v6.16b //AES block 6 - round 5 2777 aese v3.16b, v28.16b 2778 aesmc v3.16b, v3.16b //AES block 3 - round 5 2779 2780 add v30.4s, v30.4s, v31.4s //CTR block 7 2781 2782 aese v5.16b, v26.16b 2783 aesmc v5.16b, v5.16b //AES block 5 - round 6 2784 aese v4.16b, v26.16b 2785 aesmc v4.16b, v4.16b //AES block 4 - round 6 2786 aese v3.16b, v26.16b 2787 aesmc v3.16b, v3.16b //AES block 3 - round 6 2788 2789 aese v2.16b, v26.16b 2790 aesmc v2.16b, v2.16b //AES block 2 - round 6 2791 aese v6.16b, v26.16b 2792 aesmc v6.16b, v6.16b //AES block 6 - round 6 2793 aese v1.16b, v26.16b 2794 aesmc v1.16b, v1.16b //AES block 1 - round 6 2795 2796 aese v0.16b, v26.16b 2797 aesmc v0.16b, v0.16b //AES block 0 - round 6 2798 aese v7.16b, v26.16b 2799 aesmc v7.16b, v7.16b //AES block 7 - round 6 2800 ldp q28, q26, [x8, #128] //load rk8, rk9 2801 2802 aese v6.16b, v27.16b 2803 aesmc v6.16b, v6.16b //AES block 6 - round 7 2804 aese v3.16b, v27.16b 2805 aesmc v3.16b, v3.16b //AES block 3 - round 7 2806 2807 aese v4.16b, v27.16b 2808 aesmc v4.16b, v4.16b //AES block 4 - round 7 2809 aese v0.16b, v27.16b 2810 aesmc v0.16b, v0.16b //AES block 0 - round 7 2811 2812 aese v7.16b, v27.16b 2813 aesmc v7.16b, v7.16b //AES block 7 - round 7 2814 aese v1.16b, v27.16b 2815 aesmc v1.16b, v1.16b //AES block 1 - round 7 2816 2817 aese v2.16b, v27.16b 2818 aesmc v2.16b, v2.16b //AES block 2 - round 7 2819 aese v5.16b, v27.16b 2820 aesmc v5.16b, v5.16b //AES block 5 - round 7 2821 2822 aese v7.16b, v28.16b 2823 aesmc v7.16b, v7.16b //AES block 7 - round 8 2824 aese v0.16b, v28.16b 2825 aesmc v0.16b, v0.16b //AES block 0 - round 8 2826 2827 aese v4.16b, v28.16b 2828 aesmc v4.16b, v4.16b //AES block 4 - round 8 2829 aese v3.16b, v28.16b 2830 aesmc v3.16b, v3.16b //AES block 3 - round 8 2831 aese v5.16b, v28.16b 2832 aesmc v5.16b, v5.16b //AES block 5 - round 8 2833 2834 aese v2.16b, v28.16b 2835 aesmc v2.16b, v2.16b //AES block 2 - round 8 2836 aese v1.16b, v28.16b 2837 aesmc v1.16b, v1.16b //AES block 1 - round 8 2838 aese v6.16b, v28.16b 2839 aesmc v6.16b, v6.16b //AES block 6 - round 8 2840 2841 add x4, x0, x1, lsr #3 //end_input_ptr 2842 cmp x0, x5 //check if we have <= 8 blocks 2843 aese v3.16b, v26.16b 2844 aesmc v3.16b, v3.16b //AES block 3 - round 9 2845 2846 ld1 { v19.16b}, [x3] 2847 ext v19.16b, v19.16b, v19.16b, #8 2848 rev64 v19.16b, v19.16b 2849 ldp q27, q28, [x8, #160] //load rk10, rk11 2850 2851 aese v6.16b, v26.16b 2852 aesmc v6.16b, v6.16b //AES block 6 - round 9 2853 aese v1.16b, v26.16b 2854 aesmc v1.16b, v1.16b //AES block 1 - round 9 2855 2856 aese v5.16b, v26.16b 2857 aesmc v5.16b, v5.16b //AES block 5 - round 9 2858 aese v2.16b, v26.16b 2859 aesmc v2.16b, v2.16b //AES block 2 - round 9 2860 2861 aese v0.16b, v26.16b 2862 aesmc v0.16b, v0.16b //AES block 0 - round 9 2863 aese v4.16b, v26.16b 2864 aesmc v4.16b, v4.16b //AES block 4 - round 9 2865 2866 aese v6.16b, v27.16b 2867 aesmc v6.16b, v6.16b //AES block 14 - round 10 2868 aese v7.16b, v26.16b 2869 aesmc v7.16b, v7.16b //AES block 7 - round 9 2870 aese v3.16b, v27.16b 2871 aesmc v3.16b, v3.16b //AES block 11 - round 10 2872 2873 aese v1.16b, v27.16b 2874 aesmc v1.16b, v1.16b //AES block 9 - round 10 2875 aese v5.16b, v27.16b 2876 aesmc v5.16b, v5.16b //AES block 13 - round 10 2877 aese v4.16b, v27.16b 2878 aesmc v4.16b, v4.16b //AES block 12 - round 10 2879 2880 aese v0.16b, v27.16b 2881 aesmc v0.16b, v0.16b //AES block 8 - round 10 2882 aese v2.16b, v27.16b 2883 aesmc v2.16b, v2.16b //AES block 10 - round 10 2884 aese v7.16b, v27.16b 2885 aesmc v7.16b, v7.16b //AES block 15 - round 10 2886 2887 aese v6.16b, v28.16b //AES block 14 - round 11 2888 aese v3.16b, v28.16b //AES block 11 - round 11 2889 2890 aese v4.16b, v28.16b //AES block 12 - round 11 2891 aese v7.16b, v28.16b //AES block 15 - round 11 2892 ldr q26, [x8, #192] //load rk12 2893 2894 aese v1.16b, v28.16b //AES block 9 - round 11 2895 aese v5.16b, v28.16b //AES block 13 - round 11 2896 2897 aese v2.16b, v28.16b //AES block 10 - round 11 2898 aese v0.16b, v28.16b //AES block 8 - round 11 2899 b.ge .L192_enc_tail //handle tail 2900 2901 ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 2902 2903 ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 2904 2905 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 2906 2907 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 2908 2909.inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result 2910 rev32 v0.16b, v30.16b //CTR block 8 2911 add v30.4s, v30.4s, v31.4s //CTR block 8 2912 2913.inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result 2914.inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result 2915 2916 rev32 v1.16b, v30.16b //CTR block 9 2917 add v30.4s, v30.4s, v31.4s //CTR block 9 2918.inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 2919 2920.inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 2921.inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 2922 stp q8, q9, [x2], #32 //AES block 0, 1 - store result 2923 2924.inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result 2925 rev32 v2.16b, v30.16b //CTR block 10 2926 add v30.4s, v30.4s, v31.4s //CTR block 10 2927 2928 stp q10, q11, [x2], #32 //AES block 2, 3 - store result 2929 cmp x0, x5 //check if we have <= 8 blocks 2930 2931 rev32 v3.16b, v30.16b //CTR block 11 2932 add v30.4s, v30.4s, v31.4s //CTR block 11 2933.inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 2934 2935 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 2936 2937 rev32 v4.16b, v30.16b //CTR block 12 2938 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 2939 add v30.4s, v30.4s, v31.4s //CTR block 12 2940 2941 b.ge .L192_enc_prepretail //do prepretail 2942 2943.L192_enc_main_loop: //main loop start 2944 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 2945 ldp q26, q27, [x8, #0] //load rk0, rk1 2946 rev64 v10.16b, v10.16b //GHASH block 8k+2 2947 2948 rev32 v5.16b, v30.16b //CTR block 8k+13 2949 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 2950 ldr q23, [x3, #176] //load h7l | h7h 2951 ext v23.16b, v23.16b, v23.16b, #8 2952 ldr q25, [x3, #208] //load h8l | h8h 2953 ext v25.16b, v25.16b, v25.16b, #8 2954 2955 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 2956 rev64 v8.16b, v8.16b //GHASH block 8k 2957 ldr q20, [x3, #128] //load h5l | h5h 2958 ext v20.16b, v20.16b, v20.16b, #8 2959 ldr q22, [x3, #160] //load h6l | h6h 2960 ext v22.16b, v22.16b, v22.16b, #8 2961 2962 rev64 v9.16b, v9.16b //GHASH block 8k+1 2963 rev32 v6.16b, v30.16b //CTR block 8k+14 2964 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 2965 2966 eor v8.16b, v8.16b, v19.16b //PRE 1 2967 rev64 v11.16b, v11.16b //GHASH block 8k+3 2968 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 2969 2970 aese v0.16b, v26.16b 2971 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 2972 rev32 v7.16b, v30.16b //CTR block 8k+15 2973 aese v1.16b, v26.16b 2974 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 2975 2976 aese v3.16b, v26.16b 2977 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 2978 aese v5.16b, v26.16b 2979 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 2980 aese v2.16b, v26.16b 2981 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 2982 2983 aese v7.16b, v26.16b 2984 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 2985 aese v4.16b, v26.16b 2986 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 2987 aese v6.16b, v26.16b 2988 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 2989 2990 ldp q28, q26, [x8, #32] //load rk2, rk3 2991 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 2992 aese v0.16b, v27.16b 2993 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 2994 2995 aese v4.16b, v27.16b 2996 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 2997 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 2998 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 2999 3000 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3001 aese v3.16b, v27.16b 3002 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 3003 ldr q21, [x3, #144] //load h6k | h5k 3004 ldr q24, [x3, #192] //load h8k | h7k 3005 3006 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 3007 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 3008 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3009 3010 aese v1.16b, v27.16b 3011 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 3012 aese v2.16b, v27.16b 3013 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 3014 aese v5.16b, v27.16b 3015 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 3016 3017 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 3018 aese v6.16b, v27.16b 3019 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 3020 aese v7.16b, v27.16b 3021 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 3022 3023 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 3024 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 3025 aese v1.16b, v28.16b 3026 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 3027 3028 aese v3.16b, v28.16b 3029 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 3030 aese v4.16b, v28.16b 3031 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 3032 aese v6.16b, v28.16b 3033 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 3034 3035 aese v5.16b, v28.16b 3036 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 3037 aese v1.16b, v26.16b 3038 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 3039.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 3040 3041 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 3042 aese v7.16b, v28.16b 3043 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 3044 aese v4.16b, v26.16b 3045 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 3046 3047 aese v2.16b, v28.16b 3048 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 3049 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3050 aese v0.16b, v28.16b 3051 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 3052 3053 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3054 aese v3.16b, v26.16b 3055 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 3056 ldp q27, q28, [x8, #64] //load rk4, rk5 3057 3058 aese v0.16b, v26.16b 3059 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 3060 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 3061 ldr q23, [x3, #80] //load h3l | h3h 3062 ext v23.16b, v23.16b, v23.16b, #8 3063 ldr q25, [x3, #112] //load h4l | h4h 3064 ext v25.16b, v25.16b, v25.16b, #8 3065 3066 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 3067 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 3068 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 3069 3070 aese v5.16b, v26.16b 3071 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 3072 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3073 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3074 3075 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 3076 aese v6.16b, v26.16b 3077 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 3078.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 3079 3080 aese v1.16b, v27.16b 3081 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 3082 aese v3.16b, v27.16b 3083 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 3084 aese v7.16b, v26.16b 3085 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 3086 3087 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 3088 aese v6.16b, v27.16b 3089 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 3090 aese v2.16b, v26.16b 3091 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 3092 3093 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 3094 aese v0.16b, v27.16b 3095 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 3096 aese v4.16b, v27.16b 3097 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 3098 3099 aese v2.16b, v27.16b 3100 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 3101 aese v5.16b, v27.16b 3102 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 3103 aese v7.16b, v27.16b 3104 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 3105 3106.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3107 aese v4.16b, v28.16b 3108 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 3109 ldr q20, [x3, #32] //load h1l | h1h 3110 ext v20.16b, v20.16b, v20.16b, #8 3111 ldr q22, [x3, #64] //load h2l | h2h 3112 ext v22.16b, v22.16b, v22.16b, #8 3113 3114 ldp q26, q27, [x8, #96] //load rk6, rk7 3115 aese v2.16b, v28.16b 3116 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 3117 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 3118 3119 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 3120 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 3121 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 3122 3123 aese v5.16b, v28.16b 3124 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 3125 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3126 3127 aese v6.16b, v28.16b 3128 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 3129 ldr q21, [x3, #48] //load h2k | h1k 3130 ldr q24, [x3, #96] //load h4k | h3k 3131 3132 aese v1.16b, v28.16b 3133 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 3134 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 3135 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3136 3137 aese v3.16b, v28.16b 3138 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 3139 aese v7.16b, v28.16b 3140 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 3141 aese v0.16b, v28.16b 3142 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 3143 3144 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 3145 aese v4.16b, v26.16b 3146 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 3147 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3148 3149 aese v0.16b, v26.16b 3150 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 3151 aese v3.16b, v26.16b 3152 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 3153 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 3154 3155 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 3156 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3157 aese v2.16b, v26.16b 3158 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 3159 3160 aese v6.16b, v26.16b 3161 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 3162 aese v5.16b, v26.16b 3163 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 3164 3165 aese v7.16b, v26.16b 3166 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 3167 aese v2.16b, v27.16b 3168 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 3169 aese v1.16b, v26.16b 3170 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 3171 3172 aese v6.16b, v27.16b 3173 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 3174 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3175 3176 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 3177 ldp q28, q26, [x8, #128] //load rk8, rk9 3178 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 3179 3180 aese v4.16b, v27.16b 3181 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 3182 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 3183 aese v5.16b, v27.16b 3184 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 3185 3186.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3187 aese v7.16b, v27.16b 3188 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 3189 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 3190 3191 ldr d16, [x10] //MODULO - load modulo constant 3192.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 3193 aese v0.16b, v27.16b 3194 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 3195 3196 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 3197 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 3198 aese v3.16b, v27.16b 3199 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 3200 3201 aese v5.16b, v28.16b 3202 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 3203 aese v4.16b, v28.16b 3204 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 3205 aese v0.16b, v28.16b 3206 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 3207 3208 aese v6.16b, v28.16b 3209 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 3210.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 3211 aese v1.16b, v27.16b 3212 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 3213 3214 aese v7.16b, v28.16b 3215 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 3216 aese v2.16b, v28.16b 3217 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 3218 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 3219 3220 aese v1.16b, v28.16b 3221 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 3222 aese v3.16b, v28.16b 3223 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 3224 ldp q27, q28, [x8, #160] //load rk10, rk11 3225 3226.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 3227 rev32 v20.16b, v30.16b //CTR block 8k+16 3228 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 3229 3230 aese v2.16b, v26.16b 3231 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 3232.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3233.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 3234 3235 aese v6.16b, v26.16b 3236 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 3237 aese v3.16b, v26.16b 3238 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 3239 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 3240 3241 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3242 rev32 v22.16b, v30.16b //CTR block 8k+17 3243 aese v0.16b, v26.16b 3244 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 3245 3246 aese v4.16b, v26.16b 3247 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 3248 aese v1.16b, v26.16b 3249 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 3250 aese v7.16b, v26.16b 3251 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 3252 3253.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3254 aese v5.16b, v26.16b 3255 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 3256 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 3257 3258 aese v2.16b, v27.16b 3259 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 3260 aese v4.16b, v27.16b 3261 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 3262 ldr q26, [x8, #192] //load rk12 3263 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3264 3265 aese v0.16b, v27.16b 3266 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 3267 aese v7.16b, v27.16b 3268 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 3269 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 3270 3271 aese v4.16b, v28.16b //AES block 8k+12 - round 11 3272.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 3273 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext 3274 3275 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext 3276 aese v2.16b, v28.16b //AES block 8k+10 - round 11 3277 aese v1.16b, v27.16b 3278 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 3279 3280 rev32 v23.16b, v30.16b //CTR block 8k+18 3281 aese v5.16b, v27.16b 3282 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 3283 3284 aese v3.16b, v27.16b 3285 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 3286 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 3287 3288 aese v6.16b, v27.16b 3289 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 3290 aese v5.16b, v28.16b //AES block 8k+13 - round 11 3291 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 3292 3293 aese v7.16b, v28.16b //AES block 8k+15 - round 11 3294 aese v0.16b, v28.16b //AES block 8k+8 - round 11 3295.inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 3296 3297 aese v6.16b, v28.16b //AES block 8k+14 - round 11 3298 aese v3.16b, v28.16b //AES block 8k+11 - round 11 3299 aese v1.16b, v28.16b //AES block 8k+9 - round 11 3300 3301 rev32 v25.16b, v30.16b //CTR block 8k+19 3302 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 3303.inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 3304 3305.inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result 3306.inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result 3307 mov v2.16b, v23.16b //CTR block 8k+18 3308 3309.inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result 3310 mov v1.16b, v22.16b //CTR block 8k+17 3311 stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 3312 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 3313 3314.inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 3315 mov v0.16b, v20.16b //CTR block 8k+16 3316 rev32 v4.16b, v30.16b //CTR block 8k+20 3317 3318 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 3319.inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 3320.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 3321 3322.inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result 3323 mov v3.16b, v25.16b //CTR block 8k+19 3324 3325 stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 3326 3327 stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result 3328 3329 cmp x0, x5 //.LOOP CONTROL 3330 stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result 3331 b.lt .L192_enc_main_loop 3332 3333.L192_enc_prepretail: //PREPRETAIL 3334 rev32 v5.16b, v30.16b //CTR block 8k+13 3335 ldp q26, q27, [x8, #0] //load rk0, rk1 3336 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 3337 3338 ldr q23, [x3, #176] //load h7l | h7h 3339 ext v23.16b, v23.16b, v23.16b, #8 3340 ldr q25, [x3, #208] //load h8l | h8h 3341 ext v25.16b, v25.16b, v25.16b, #8 3342 rev64 v8.16b, v8.16b //GHASH block 8k 3343 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 3344 3345 rev32 v6.16b, v30.16b //CTR block 8k+14 3346 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 3347 ldr q21, [x3, #144] //load h6k | h5k 3348 ldr q24, [x3, #192] //load h8k | h7k 3349 3350 rev64 v11.16b, v11.16b //GHASH block 8k+3 3351 rev64 v10.16b, v10.16b //GHASH block 8k+2 3352 ldr q20, [x3, #128] //load h5l | h5h 3353 ext v20.16b, v20.16b, v20.16b, #8 3354 ldr q22, [x3, #160] //load h6l | h6h 3355 ext v22.16b, v22.16b, v22.16b, #8 3356 3357 eor v8.16b, v8.16b, v19.16b //PRE 1 3358 rev32 v7.16b, v30.16b //CTR block 8k+15 3359 rev64 v9.16b, v9.16b //GHASH block 8k+1 3360 3361 aese v5.16b, v26.16b 3362 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 3363 aese v2.16b, v26.16b 3364 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 3365 aese v3.16b, v26.16b 3366 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 3367 3368 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 3369 aese v0.16b, v26.16b 3370 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 3371 aese v6.16b, v26.16b 3372 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 3373 3374 aese v1.16b, v26.16b 3375 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 3376 aese v4.16b, v26.16b 3377 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 3378 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 3379 3380 aese v6.16b, v27.16b 3381 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 3382 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 3383 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3384 3385 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 3386 aese v7.16b, v26.16b 3387 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 3388 ldp q28, q26, [x8, #32] //load rk2, rk3 3389 3390 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 3391 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 3392 aese v2.16b, v27.16b 3393 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 3394 3395 aese v5.16b, v27.16b 3396 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 3397 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 3398 aese v1.16b, v27.16b 3399 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 3400 3401 aese v7.16b, v27.16b 3402 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 3403 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 3404 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 3405 3406 aese v3.16b, v27.16b 3407 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 3408 aese v0.16b, v27.16b 3409 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 3410 aese v4.16b, v27.16b 3411 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 3412 3413 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 3414 aese v5.16b, v28.16b 3415 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 3416 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 3417 3418 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 3419 aese v7.16b, v28.16b 3420 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 3421.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 3422 3423 aese v5.16b, v26.16b 3424 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 3425 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3426 aese v6.16b, v28.16b 3427 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 3428 3429 aese v0.16b, v28.16b 3430 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 3431 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 3432 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 3433 3434 aese v3.16b, v28.16b 3435 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 3436 rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free) 3437 rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free) 3438 3439 aese v2.16b, v28.16b 3440 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 3441 aese v1.16b, v28.16b 3442 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 3443 aese v4.16b, v28.16b 3444 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 3445 3446 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3447 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 3448 ldp q27, q28, [x8, #64] //load rk4, rk5 3449 3450 aese v1.16b, v26.16b 3451 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 3452 aese v6.16b, v26.16b 3453 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 3454 aese v2.16b, v26.16b 3455 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 3456 3457 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 3458.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 3459 aese v7.16b, v26.16b 3460 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 3461 3462 ldr q23, [x3, #80] //load h3l | h3h 3463 ext v23.16b, v23.16b, v23.16b, #8 3464 ldr q25, [x3, #112] //load h4l | h4h 3465 ext v25.16b, v25.16b, v25.16b, #8 3466 aese v3.16b, v26.16b 3467 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 3468 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 3469 3470 ldr q20, [x3, #32] //load h1l | h1h 3471 ext v20.16b, v20.16b, v20.16b, #8 3472 ldr q22, [x3, #64] //load h2l | h2h 3473 ext v22.16b, v22.16b, v22.16b, #8 3474 aese v4.16b, v26.16b 3475 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 3476 rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free) 3477 3478 aese v0.16b, v26.16b 3479 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 3480 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 3481 aese v6.16b, v27.16b 3482 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 3483 3484 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3485 aese v7.16b, v27.16b 3486 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 3487 aese v5.16b, v27.16b 3488 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 3489 3490.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 3491 aese v3.16b, v27.16b 3492 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 3493 aese v0.16b, v27.16b 3494 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 3495 3496 aese v1.16b, v27.16b 3497 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 3498 aese v4.16b, v27.16b 3499 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 3500 aese v2.16b, v27.16b 3501 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 3502 3503 aese v0.16b, v28.16b 3504 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 3505 rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free) 3506 ldr q21, [x3, #48] //load h2k | h1k 3507 ldr q24, [x3, #96] //load h4k | h3k 3508 3509 aese v1.16b, v28.16b 3510 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 3511 aese v2.16b, v28.16b 3512 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 3513 ldp q26, q27, [x8, #96] //load rk6, rk7 3514 3515 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 3516 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 3517 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 3518 3519 aese v4.16b, v28.16b 3520 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 3521 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 3522 3523 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 3524 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 3525 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 3526 3527 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3528 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3529 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 3530 3531 aese v5.16b, v28.16b 3532 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 3533 aese v1.16b, v26.16b 3534 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 3535 aese v7.16b, v28.16b 3536 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 3537 3538 aese v6.16b, v28.16b 3539 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 3540 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3541 aese v3.16b, v28.16b 3542 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 3543 3544 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 3545 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 3546 3547 aese v4.16b, v26.16b 3548 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 3549 aese v5.16b, v26.16b 3550 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 3551 aese v1.16b, v27.16b 3552 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 3553 3554 aese v0.16b, v26.16b 3555 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 3556 aese v7.16b, v26.16b 3557 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 3558.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 3559 3560 aese v2.16b, v26.16b 3561 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 3562.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 3563 aese v5.16b, v27.16b 3564 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 3565 3566 aese v6.16b, v26.16b 3567 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 3568 ldr d16, [x10] //MODULO - load modulo constant 3569 aese v3.16b, v26.16b 3570 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 3571 3572 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 3573 aese v0.16b, v27.16b 3574 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 3575.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 3576 3577 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 3578 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 3579 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 3580 3581 aese v4.16b, v27.16b 3582 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 3583 aese v2.16b, v27.16b 3584 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 3585 ldp q28, q26, [x8, #128] //load rk8, rk9 3586 3587 aese v3.16b, v27.16b 3588 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 3589.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 3590 3591.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 3592.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 3593 3594.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3595 ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3596 aese v7.16b, v27.16b 3597 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 3598 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3599 3600 aese v5.16b, v28.16b 3601 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 3602 aese v1.16b, v28.16b 3603 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 3604 3605 aese v6.16b, v27.16b 3606 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 3607 aese v2.16b, v28.16b 3608 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 3609.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 3610 3611 aese v3.16b, v28.16b 3612 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 3613 aese v5.16b, v26.16b 3614 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 3615 aese v4.16b, v28.16b 3616 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 3617 3618 aese v0.16b, v28.16b 3619 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 3620 aese v7.16b, v28.16b 3621 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 3622 aese v6.16b, v28.16b 3623 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 3624 3625 aese v3.16b, v26.16b 3626 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 3627 ldp q27, q28, [x8, #160] //load rk10, rk11 3628 aese v4.16b, v26.16b 3629 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 3630 3631 aese v2.16b, v26.16b 3632 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 3633 aese v7.16b, v26.16b 3634 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 3635 3636 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 3637 aese v6.16b, v26.16b 3638 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 3639 aese v0.16b, v26.16b 3640 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 3641 aese v1.16b, v26.16b 3642 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 3643 3644 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 3645 ldr q26, [x8, #192] //load rk12 3646 3647 aese v7.16b, v27.16b 3648 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 3649 aese v1.16b, v27.16b 3650 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 3651 aese v2.16b, v27.16b 3652 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 3653 3654.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 3655 aese v0.16b, v27.16b 3656 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 3657 aese v3.16b, v27.16b 3658 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 3659 3660 aese v1.16b, v28.16b //AES block 8k+9 - round 11 3661 aese v7.16b, v28.16b //AES block 8k+15 - round 11 3662 3663 aese v4.16b, v27.16b 3664 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 3665 aese v3.16b, v28.16b //AES block 8k+11 - round 11 3666 3667 aese v5.16b, v27.16b 3668 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 3669 aese v6.16b, v27.16b 3670 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 3671 3672 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 3673 aese v2.16b, v28.16b //AES block 8k+10 - round 11 3674 aese v0.16b, v28.16b //AES block 8k+8 - round 11 3675 3676 aese v6.16b, v28.16b //AES block 8k+14 - round 11 3677 aese v4.16b, v28.16b //AES block 8k+12 - round 11 3678 aese v5.16b, v28.16b //AES block 8k+13 - round 11 3679 3680.L192_enc_tail: //TAIL 3681 3682 ldp q20, q21, [x3, #128] //load h5l | h5h 3683 ext v20.16b, v20.16b, v20.16b, #8 3684 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 3685 3686 ldr q8, [x0], #16 //AES block 8k+8 - l3ad plaintext 3687 3688 ldp q24, q25, [x3, #192] //load h8k | h7k 3689 ext v25.16b, v25.16b, v25.16b, #8 3690 3691 mov v29.16b, v26.16b 3692 3693 ldp q22, q23, [x3, #160] //load h6l | h6h 3694 ext v22.16b, v22.16b, v22.16b, #8 3695 ext v23.16b, v23.16b, v23.16b, #8 3696 cmp x5, #112 3697 3698.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 3699 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 3700 b.gt .L192_enc_blocks_more_than_7 3701 3702 cmp x5, #96 3703 mov v7.16b, v6.16b 3704 movi v17.8b, #0 3705 3706 mov v6.16b, v5.16b 3707 movi v19.8b, #0 3708 sub v30.4s, v30.4s, v31.4s 3709 3710 mov v5.16b, v4.16b 3711 mov v4.16b, v3.16b 3712 mov v3.16b, v2.16b 3713 3714 mov v2.16b, v1.16b 3715 movi v18.8b, #0 3716 b.gt .L192_enc_blocks_more_than_6 3717 3718 mov v7.16b, v6.16b 3719 cmp x5, #80 3720 3721 mov v6.16b, v5.16b 3722 mov v5.16b, v4.16b 3723 mov v4.16b, v3.16b 3724 3725 mov v3.16b, v1.16b 3726 sub v30.4s, v30.4s, v31.4s 3727 b.gt .L192_enc_blocks_more_than_5 3728 3729 cmp x5, #64 3730 sub v30.4s, v30.4s, v31.4s 3731 3732 mov v7.16b, v6.16b 3733 mov v6.16b, v5.16b 3734 mov v5.16b, v4.16b 3735 3736 mov v4.16b, v1.16b 3737 b.gt .L192_enc_blocks_more_than_4 3738 3739 mov v7.16b, v6.16b 3740 mov v6.16b, v5.16b 3741 mov v5.16b, v1.16b 3742 3743 sub v30.4s, v30.4s, v31.4s 3744 cmp x5, #48 3745 b.gt .L192_enc_blocks_more_than_3 3746 3747 mov v7.16b, v6.16b 3748 mov v6.16b, v1.16b 3749 sub v30.4s, v30.4s, v31.4s 3750 3751 ldr q24, [x3, #96] //load h4k | h3k 3752 cmp x5, #32 3753 b.gt .L192_enc_blocks_more_than_2 3754 3755 sub v30.4s, v30.4s, v31.4s 3756 3757 cmp x5, #16 3758 mov v7.16b, v1.16b 3759 b.gt .L192_enc_blocks_more_than_1 3760 3761 sub v30.4s, v30.4s, v31.4s 3762 ldr q21, [x3, #48] //load h2k | h1k 3763 b .L192_enc_blocks_less_than_1 3764.L192_enc_blocks_more_than_7: //blocks left > 7 3765 st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 3766 3767 rev64 v8.16b, v9.16b //GHASH final-7 block 3768 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 3769 3770 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3771 3772 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 3773 3774 ldr q9, [x0], #16 //AES final-6 block - load plaintext 3775 3776 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 3777 movi v16.8b, #0 //suppress further partial tag feed in 3778 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 3779 3780 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 3781 3782 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 3783.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 3784.L192_enc_blocks_more_than_6: //blocks left > 6 3785 3786 st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 3787 3788 rev64 v8.16b, v9.16b //GHASH final-6 block 3789 3790 ldr q9, [x0], #16 //AES final-5 block - load plaintext 3791 3792 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3793 3794 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 3795 3796 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 3797.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 3798 3799 movi v16.8b, #0 //suppress further partial tag feed in 3800 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 3801 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 3802 3803 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 3804 3805 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 3806 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 3807 3808 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 3809.L192_enc_blocks_more_than_5: //blocks left > 5 3810 3811 st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 3812 3813 rev64 v8.16b, v9.16b //GHASH final-5 block 3814 3815 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3816 3817 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 3818 3819 ldr q9, [x0], #16 //AES final-4 block - load plaintext 3820 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 3821 3822 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 3823 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 3824 3825 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 3826 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 3827 3828 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 3829 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 3830 3831.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 3832 movi v16.8b, #0 //suppress further partial tag feed in 3833 3834 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 3835.L192_enc_blocks_more_than_4: //blocks left > 4 3836 3837 st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 3838 3839 rev64 v8.16b, v9.16b //GHASH final-4 block 3840 3841 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3842 3843 ldr q9, [x0], #16 //AES final-3 block - load plaintext 3844 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 3845 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 3846 3847 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 3848 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 3849 3850 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 3851 3852 movi v16.8b, #0 //suppress further partial tag feed in 3853 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 3854 3855 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 3856 3857 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 3858.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 3859.L192_enc_blocks_more_than_3: //blocks left > 3 3860 3861 ldr q24, [x3, #96] //load h4k | h3k 3862 st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 3863 3864 rev64 v8.16b, v9.16b //GHASH final-3 block 3865 3866 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3867 movi v16.8b, #0 //suppress further partial tag feed in 3868 3869 ldr q9, [x0], #16 //AES final-2 block - load plaintext 3870 ldr q25, [x3, #112] //load h4l | h4h 3871 ext v25.16b, v25.16b, v25.16b, #8 3872 3873 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 3874 3875.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 3876 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 3877 3878 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 3879 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 3880 3881 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 3882 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 3883 3884 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 3885 3886 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 3887 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 3888.L192_enc_blocks_more_than_2: //blocks left > 2 3889 3890 st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 3891 3892 rev64 v8.16b, v9.16b //GHASH final-2 block 3893 ldr q23, [x3, #80] //load h3l | h3h 3894 ext v23.16b, v23.16b, v23.16b, #8 3895 3896 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3897 3898 ldr q9, [x0], #16 //AES final-1 block - load plaintext 3899 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 3900 3901 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 3902 3903 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 3904 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 3905 movi v16.8b, #0 //suppress further partial tag feed in 3906 3907 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 3908 3909 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 3910 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 3911 3912 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 3913.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 3914.L192_enc_blocks_more_than_1: //blocks left > 1 3915 3916 ldr q22, [x3, #64] //load h1l | h1h 3917 ext v22.16b, v22.16b, v22.16b, #8 3918 st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 3919 3920 rev64 v8.16b, v9.16b //GHASH final-1 block 3921 3922 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3923 3924 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 3925 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 3926 3927 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 3928 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 3929 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 3930 3931 ldr q9, [x0], #16 //AES final block - load plaintext 3932 ldr q21, [x3, #48] //load h2k | h1k 3933 3934 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 3935 3936.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 3937 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 3938 3939 movi v16.8b, #0 //suppress further partial tag feed in 3940 3941 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 3942 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 3943.L192_enc_blocks_less_than_1: //blocks left <= 1 3944 3945 mvn x6, xzr //temp0_x = 0xffffffffffffffff 3946 and x1, x1, #127 //bit_length %= 128 3947 3948 sub x1, x1, #128 //bit_length -= 128 3949 3950 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 3951 3952 and x1, x1, #127 //bit_length %= 128 3953 3954 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 3955 cmp x1, #64 3956 mvn x7, xzr //temp1_x = 0xffffffffffffffff 3957 3958 csel x13, x7, x6, lt 3959 csel x14, x6, xzr, lt 3960 3961 mov v0.d[1], x14 3962 ldr q20, [x3, #32] //load h1l | h1h 3963 ext v20.16b, v20.16b, v20.16b, #8 3964 3965 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 3966 mov v0.d[0], x13 //ctr0b is mask for last block 3967 3968 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 3969 3970 rev64 v8.16b, v9.16b //GHASH final block 3971 bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 3972 3973 st1 { v9.16b}, [x2] //store all 16B 3974 3975 eor v8.16b, v8.16b, v16.16b //feed in partial tag 3976 3977 ins v16.d[0], v8.d[1] //GHASH final block - mid 3978 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 3979 3980 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 3981 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 3982 3983 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 3984 3985 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 3986 3987 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 3988 ldr d16, [x10] //MODULO - load modulo constant 3989 3990 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 3991 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 3992 3993 rev32 v30.16b, v30.16b 3994 3995 str q30, [x16] //store the updated counter 3996.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 3997 3998 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 3999 4000.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 4001 4002 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 4003 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 4004 4005.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 4006 ext v19.16b, v19.16b, v19.16b, #8 4007 rev64 v19.16b, v19.16b 4008 st1 { v19.16b }, [x3] 4009 4010 mov x0, x9 //return sizes 4011 4012 ldp d10, d11, [sp, #16] 4013 ldp d12, d13, [sp, #32] 4014 ldp d14, d15, [sp, #48] 4015 ldp d8, d9, [sp], #80 4016 ret 4017 4018.L192_enc_ret: 4019 mov w0, #0x0 4020 ret 4021.size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel 4022.globl unroll8_eor3_aes_gcm_dec_192_kernel 4023.type unroll8_eor3_aes_gcm_dec_192_kernel,%function 4024.align 4 4025unroll8_eor3_aes_gcm_dec_192_kernel: 4026 AARCH64_VALID_CALL_TARGET 4027 cbz x1, .L192_dec_ret 4028 stp d8, d9, [sp, #-80]! 4029 lsr x9, x1, #3 4030 mov x16, x4 4031 mov x8, x5 4032 stp d10, d11, [sp, #16] 4033 stp d12, d13, [sp, #32] 4034 stp d14, d15, [sp, #48] 4035 mov x5, #0xc200000000000000 4036 stp x5, xzr, [sp, #64] 4037 add x10, sp, #64 4038 4039 mov x5, x9 4040 ld1 { v0.16b}, [x16] //CTR block 0 4041 ld1 { v19.16b}, [x3] 4042 4043 mov x15, #0x100000000 //set up counter increment 4044 movi v31.16b, #0x0 4045 mov v31.d[1], x15 4046 4047 rev32 v30.16b, v0.16b //set up reversed counter 4048 4049 add v30.4s, v30.4s, v31.4s //CTR block 0 4050 4051 rev32 v1.16b, v30.16b //CTR block 1 4052 add v30.4s, v30.4s, v31.4s //CTR block 1 4053 4054 rev32 v2.16b, v30.16b //CTR block 2 4055 add v30.4s, v30.4s, v31.4s //CTR block 2 4056 4057 rev32 v3.16b, v30.16b //CTR block 3 4058 add v30.4s, v30.4s, v31.4s //CTR block 3 4059 4060 rev32 v4.16b, v30.16b //CTR block 4 4061 add v30.4s, v30.4s, v31.4s //CTR block 4 4062 4063 rev32 v5.16b, v30.16b //CTR block 5 4064 add v30.4s, v30.4s, v31.4s //CTR block 5 4065 ldp q26, q27, [x8, #0] //load rk0, rk1 4066 4067 rev32 v6.16b, v30.16b //CTR block 6 4068 add v30.4s, v30.4s, v31.4s //CTR block 6 4069 4070 rev32 v7.16b, v30.16b //CTR block 7 4071 4072 aese v3.16b, v26.16b 4073 aesmc v3.16b, v3.16b //AES block 3 - round 0 4074 aese v6.16b, v26.16b 4075 aesmc v6.16b, v6.16b //AES block 6 - round 0 4076 aese v5.16b, v26.16b 4077 aesmc v5.16b, v5.16b //AES block 5 - round 0 4078 4079 aese v0.16b, v26.16b 4080 aesmc v0.16b, v0.16b //AES block 0 - round 0 4081 aese v1.16b, v26.16b 4082 aesmc v1.16b, v1.16b //AES block 1 - round 0 4083 aese v7.16b, v26.16b 4084 aesmc v7.16b, v7.16b //AES block 7 - round 0 4085 4086 aese v2.16b, v26.16b 4087 aesmc v2.16b, v2.16b //AES block 2 - round 0 4088 aese v4.16b, v26.16b 4089 aesmc v4.16b, v4.16b //AES block 4 - round 0 4090 ldp q28, q26, [x8, #32] //load rk2, rk3 4091 4092 aese v1.16b, v27.16b 4093 aesmc v1.16b, v1.16b //AES block 1 - round 1 4094 4095 aese v2.16b, v27.16b 4096 aesmc v2.16b, v2.16b //AES block 2 - round 1 4097 4098 aese v0.16b, v27.16b 4099 aesmc v0.16b, v0.16b //AES block 0 - round 1 4100 aese v3.16b, v27.16b 4101 aesmc v3.16b, v3.16b //AES block 3 - round 1 4102 aese v7.16b, v27.16b 4103 aesmc v7.16b, v7.16b //AES block 7 - round 1 4104 4105 aese v5.16b, v27.16b 4106 aesmc v5.16b, v5.16b //AES block 5 - round 1 4107 aese v6.16b, v27.16b 4108 aesmc v6.16b, v6.16b //AES block 6 - round 1 4109 4110 aese v7.16b, v28.16b 4111 aesmc v7.16b, v7.16b //AES block 7 - round 2 4112 aese v0.16b, v28.16b 4113 aesmc v0.16b, v0.16b //AES block 0 - round 2 4114 aese v4.16b, v27.16b 4115 aesmc v4.16b, v4.16b //AES block 4 - round 1 4116 4117 aese v5.16b, v28.16b 4118 aesmc v5.16b, v5.16b //AES block 5 - round 2 4119 aese v1.16b, v28.16b 4120 aesmc v1.16b, v1.16b //AES block 1 - round 2 4121 aese v2.16b, v28.16b 4122 aesmc v2.16b, v2.16b //AES block 2 - round 2 4123 4124 aese v3.16b, v28.16b 4125 aesmc v3.16b, v3.16b //AES block 3 - round 2 4126 aese v4.16b, v28.16b 4127 aesmc v4.16b, v4.16b //AES block 4 - round 2 4128 aese v6.16b, v28.16b 4129 aesmc v6.16b, v6.16b //AES block 6 - round 2 4130 4131 aese v7.16b, v26.16b 4132 aesmc v7.16b, v7.16b //AES block 7 - round 3 4133 4134 ldp q27, q28, [x8, #64] //load rk4, rk5 4135 aese v2.16b, v26.16b 4136 aesmc v2.16b, v2.16b //AES block 2 - round 3 4137 aese v5.16b, v26.16b 4138 aesmc v5.16b, v5.16b //AES block 5 - round 3 4139 4140 aese v0.16b, v26.16b 4141 aesmc v0.16b, v0.16b //AES block 0 - round 3 4142 aese v3.16b, v26.16b 4143 aesmc v3.16b, v3.16b //AES block 3 - round 3 4144 4145 aese v4.16b, v26.16b 4146 aesmc v4.16b, v4.16b //AES block 4 - round 3 4147 aese v1.16b, v26.16b 4148 aesmc v1.16b, v1.16b //AES block 1 - round 3 4149 aese v6.16b, v26.16b 4150 aesmc v6.16b, v6.16b //AES block 6 - round 3 4151 4152 aese v3.16b, v27.16b 4153 aesmc v3.16b, v3.16b //AES block 3 - round 4 4154 aese v2.16b, v27.16b 4155 aesmc v2.16b, v2.16b //AES block 2 - round 4 4156 aese v5.16b, v27.16b 4157 aesmc v5.16b, v5.16b //AES block 5 - round 4 4158 4159 aese v1.16b, v27.16b 4160 aesmc v1.16b, v1.16b //AES block 1 - round 4 4161 aese v7.16b, v27.16b 4162 aesmc v7.16b, v7.16b //AES block 7 - round 4 4163 aese v6.16b, v27.16b 4164 aesmc v6.16b, v6.16b //AES block 6 - round 4 4165 4166 aese v0.16b, v27.16b 4167 aesmc v0.16b, v0.16b //AES block 0 - round 4 4168 aese v5.16b, v28.16b 4169 aesmc v5.16b, v5.16b //AES block 5 - round 5 4170 aese v4.16b, v27.16b 4171 aesmc v4.16b, v4.16b //AES block 4 - round 4 4172 4173 aese v6.16b, v28.16b 4174 aesmc v6.16b, v6.16b //AES block 6 - round 5 4175 ldp q26, q27, [x8, #96] //load rk6, rk7 4176 4177 aese v0.16b, v28.16b 4178 aesmc v0.16b, v0.16b //AES block 0 - round 5 4179 aese v4.16b, v28.16b 4180 aesmc v4.16b, v4.16b //AES block 4 - round 5 4181 aese v1.16b, v28.16b 4182 aesmc v1.16b, v1.16b //AES block 1 - round 5 4183 4184 aese v3.16b, v28.16b 4185 aesmc v3.16b, v3.16b //AES block 3 - round 5 4186 aese v2.16b, v28.16b 4187 aesmc v2.16b, v2.16b //AES block 2 - round 5 4188 aese v7.16b, v28.16b 4189 aesmc v7.16b, v7.16b //AES block 7 - round 5 4190 4191 sub x5, x5, #1 //byte_len - 1 4192 4193 aese v4.16b, v26.16b 4194 aesmc v4.16b, v4.16b //AES block 4 - round 6 4195 aese v5.16b, v26.16b 4196 aesmc v5.16b, v5.16b //AES block 5 - round 6 4197 aese v1.16b, v26.16b 4198 aesmc v1.16b, v1.16b //AES block 1 - round 6 4199 4200 aese v0.16b, v26.16b 4201 aesmc v0.16b, v0.16b //AES block 0 - round 6 4202 aese v3.16b, v26.16b 4203 aesmc v3.16b, v3.16b //AES block 3 - round 6 4204 aese v6.16b, v26.16b 4205 aesmc v6.16b, v6.16b //AES block 6 - round 6 4206 4207 aese v7.16b, v26.16b 4208 aesmc v7.16b, v7.16b //AES block 7 - round 6 4209 aese v2.16b, v26.16b 4210 aesmc v2.16b, v2.16b //AES block 2 - round 6 4211 ldp q28, q26, [x8, #128] //load rk8, rk9 4212 4213 add v30.4s, v30.4s, v31.4s //CTR block 7 4214 4215 aese v3.16b, v27.16b 4216 aesmc v3.16b, v3.16b //AES block 3 - round 7 4217 aese v7.16b, v27.16b 4218 aesmc v7.16b, v7.16b //AES block 7 - round 7 4219 4220 aese v2.16b, v27.16b 4221 aesmc v2.16b, v2.16b //AES block 2 - round 7 4222 aese v1.16b, v27.16b 4223 aesmc v1.16b, v1.16b //AES block 1 - round 7 4224 aese v4.16b, v27.16b 4225 aesmc v4.16b, v4.16b //AES block 4 - round 7 4226 4227 aese v6.16b, v27.16b 4228 aesmc v6.16b, v6.16b //AES block 6 - round 7 4229 aese v0.16b, v27.16b 4230 aesmc v0.16b, v0.16b //AES block 0 - round 7 4231 aese v5.16b, v27.16b 4232 aesmc v5.16b, v5.16b //AES block 5 - round 7 4233 4234 aese v1.16b, v28.16b 4235 aesmc v1.16b, v1.16b //AES block 1 - round 8 4236 aese v2.16b, v28.16b 4237 aesmc v2.16b, v2.16b //AES block 2 - round 8 4238 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 4239 4240 aese v7.16b, v28.16b 4241 aesmc v7.16b, v7.16b //AES block 7 - round 8 4242 aese v6.16b, v28.16b 4243 aesmc v6.16b, v6.16b //AES block 6 - round 8 4244 aese v5.16b, v28.16b 4245 aesmc v5.16b, v5.16b //AES block 5 - round 8 4246 4247 aese v4.16b, v28.16b 4248 aesmc v4.16b, v4.16b //AES block 4 - round 8 4249 aese v3.16b, v28.16b 4250 aesmc v3.16b, v3.16b //AES block 3 - round 8 4251 aese v0.16b, v28.16b 4252 aesmc v0.16b, v0.16b //AES block 0 - round 8 4253 4254 add x4, x0, x1, lsr #3 //end_input_ptr 4255 aese v6.16b, v26.16b 4256 aesmc v6.16b, v6.16b //AES block 6 - round 9 4257 4258 ld1 { v19.16b}, [x3] 4259 ext v19.16b, v19.16b, v19.16b, #8 4260 rev64 v19.16b, v19.16b 4261 4262 ldp q27, q28, [x8, #160] //load rk10, rk11 4263 4264 aese v0.16b, v26.16b 4265 aesmc v0.16b, v0.16b //AES block 0 - round 9 4266 add x5, x5, x0 4267 4268 aese v1.16b, v26.16b 4269 aesmc v1.16b, v1.16b //AES block 1 - round 9 4270 aese v7.16b, v26.16b 4271 aesmc v7.16b, v7.16b //AES block 7 - round 9 4272 aese v4.16b, v26.16b 4273 aesmc v4.16b, v4.16b //AES block 4 - round 9 4274 4275 cmp x0, x5 //check if we have <= 8 blocks 4276 aese v3.16b, v26.16b 4277 aesmc v3.16b, v3.16b //AES block 3 - round 9 4278 4279 aese v5.16b, v26.16b 4280 aesmc v5.16b, v5.16b //AES block 5 - round 9 4281 aese v2.16b, v26.16b 4282 aesmc v2.16b, v2.16b //AES block 2 - round 9 4283 4284 aese v3.16b, v27.16b 4285 aesmc v3.16b, v3.16b //AES block 3 - round 10 4286 aese v1.16b, v27.16b 4287 aesmc v1.16b, v1.16b //AES block 1 - round 10 4288 aese v7.16b, v27.16b 4289 aesmc v7.16b, v7.16b //AES block 7 - round 10 4290 4291 aese v4.16b, v27.16b 4292 aesmc v4.16b, v4.16b //AES block 4 - round 10 4293 aese v0.16b, v27.16b 4294 aesmc v0.16b, v0.16b //AES block 0 - round 10 4295 aese v2.16b, v27.16b 4296 aesmc v2.16b, v2.16b //AES block 2 - round 10 4297 4298 aese v6.16b, v27.16b 4299 aesmc v6.16b, v6.16b //AES block 6 - round 10 4300 aese v5.16b, v27.16b 4301 aesmc v5.16b, v5.16b //AES block 5 - round 10 4302 ldr q26, [x8, #192] //load rk12 4303 4304 aese v0.16b, v28.16b //AES block 0 - round 11 4305 aese v1.16b, v28.16b //AES block 1 - round 11 4306 aese v4.16b, v28.16b //AES block 4 - round 11 4307 4308 aese v6.16b, v28.16b //AES block 6 - round 11 4309 aese v5.16b, v28.16b //AES block 5 - round 11 4310 aese v7.16b, v28.16b //AES block 7 - round 11 4311 4312 aese v2.16b, v28.16b //AES block 2 - round 11 4313 aese v3.16b, v28.16b //AES block 3 - round 11 4314 b.ge .L192_dec_tail //handle tail 4315 4316 ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 4317 4318 ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 4319 4320 ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 4321 4322.inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result 4323.inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result 4324 stp q0, q1, [x2], #32 //AES block 0, 1 - store result 4325 4326 rev32 v0.16b, v30.16b //CTR block 8 4327 add v30.4s, v30.4s, v31.4s //CTR block 8 4328 4329 rev32 v1.16b, v30.16b //CTR block 9 4330 add v30.4s, v30.4s, v31.4s //CTR block 9 4331.inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result 4332 4333.inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result 4334 stp q2, q3, [x2], #32 //AES block 2, 3 - store result 4335 ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 4336 4337 rev32 v2.16b, v30.16b //CTR block 10 4338 add v30.4s, v30.4s, v31.4s //CTR block 10 4339 4340.inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result 4341 4342 rev32 v3.16b, v30.16b //CTR block 11 4343 add v30.4s, v30.4s, v31.4s //CTR block 11 4344 4345.inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result 4346 stp q4, q5, [x2], #32 //AES block 4, 5 - store result 4347 cmp x0, x5 //check if we have <= 8 blocks 4348 4349.inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result 4350.inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result 4351 rev32 v4.16b, v30.16b //CTR block 12 4352 4353 add v30.4s, v30.4s, v31.4s //CTR block 12 4354 stp q6, q7, [x2], #32 //AES block 6, 7 - store result 4355 b.ge .L192_dec_prepretail //do prepretail 4356 4357.L192_dec_main_loop: //main loop start 4358 rev64 v9.16b, v9.16b //GHASH block 8k+1 4359 ldp q26, q27, [x8, #0] //load rk0, rk1 4360 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 4361 4362 rev64 v8.16b, v8.16b //GHASH block 8k 4363 rev32 v5.16b, v30.16b //CTR block 8k+13 4364 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 4365 4366 ldr q23, [x3, #176] //load h7l | h7h 4367 ext v23.16b, v23.16b, v23.16b, #8 4368 ldr q25, [x3, #208] //load h8l | h8h 4369 ext v25.16b, v25.16b, v25.16b, #8 4370 rev64 v12.16b, v12.16b //GHASH block 8k+4 4371 rev64 v11.16b, v11.16b //GHASH block 8k+3 4372 4373 eor v8.16b, v8.16b, v19.16b //PRE 1 4374 rev32 v6.16b, v30.16b //CTR block 8k+14 4375 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 4376 4377 rev64 v13.16b, v13.16b //GHASH block 8k+5 4378 4379 rev32 v7.16b, v30.16b //CTR block 8k+15 4380 aese v1.16b, v26.16b 4381 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 4382 aese v6.16b, v26.16b 4383 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 4384 4385 aese v5.16b, v26.16b 4386 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 4387 aese v4.16b, v26.16b 4388 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 4389 aese v0.16b, v26.16b 4390 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 4391 4392 aese v7.16b, v26.16b 4393 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 4394 aese v2.16b, v26.16b 4395 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 4396 aese v3.16b, v26.16b 4397 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 4398 4399 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 4400 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 4401 ldp q28, q26, [x8, #32] //load rk2, rk3 4402 4403 aese v6.16b, v27.16b 4404 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 4405 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 4406 ldr q20, [x3, #128] //load h5l | h5h 4407 ext v20.16b, v20.16b, v20.16b, #8 4408 ldr q22, [x3, #160] //load h6l | h6h 4409 ext v22.16b, v22.16b, v22.16b, #8 4410 4411 aese v0.16b, v27.16b 4412 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 4413 aese v3.16b, v27.16b 4414 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 4415 aese v7.16b, v27.16b 4416 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 4417 4418 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 4419 aese v2.16b, v27.16b 4420 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 4421 aese v4.16b, v27.16b 4422 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 4423 4424 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4425 rev64 v10.16b, v10.16b //GHASH block 8k+2 4426 aese v1.16b, v27.16b 4427 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 4428 4429 aese v5.16b, v27.16b 4430 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 4431 ldr q21, [x3, #144] //load h6k | h5k 4432 ldr q24, [x3, #192] //load h8k | h7k 4433 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4434 4435 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 4436 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 4437 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 4438 4439 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 4440 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 4441 aese v6.16b, v28.16b 4442 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 4443 4444 aese v2.16b, v28.16b 4445 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 4446 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 4447.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 4448 4449 aese v1.16b, v28.16b 4450 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 4451 aese v6.16b, v26.16b 4452 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 4453 aese v4.16b, v28.16b 4454 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 4455 4456 aese v0.16b, v28.16b 4457 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 4458 aese v7.16b, v28.16b 4459 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 4460 aese v3.16b, v28.16b 4461 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 4462 4463 ldr q23, [x3, #80] //load h3l | h3h 4464 ext v23.16b, v23.16b, v23.16b, #8 4465 ldr q25, [x3, #112] //load h4l | h4h 4466 ext v25.16b, v25.16b, v25.16b, #8 4467 aese v5.16b, v28.16b 4468 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 4469 aese v2.16b, v26.16b 4470 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 4471 4472 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 4473 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4474 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4475 4476 aese v3.16b, v26.16b 4477 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 4478 aese v4.16b, v26.16b 4479 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 4480 4481 aese v0.16b, v26.16b 4482 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 4483 aese v7.16b, v26.16b 4484 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 4485 ldp q27, q28, [x8, #64] //load rk4, rk5 4486 4487 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4488.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 4489 aese v1.16b, v26.16b 4490 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 4491 4492 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4493 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 4494 4495 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 4496 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 4497 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 4498 4499 aese v5.16b, v26.16b 4500 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 4501 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 4502 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 4503 4504 aese v4.16b, v27.16b 4505 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 4506 aese v6.16b, v27.16b 4507 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 4508 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 4509 4510 aese v5.16b, v27.16b 4511 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 4512 aese v1.16b, v27.16b 4513 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 4514 aese v3.16b, v27.16b 4515 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 4516 4517 aese v2.16b, v27.16b 4518 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 4519 aese v0.16b, v27.16b 4520 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 4521 aese v7.16b, v27.16b 4522 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 4523 4524 ldr q20, [x3, #32] //load h1l | h1h 4525 ext v20.16b, v20.16b, v20.16b, #8 4526 ldr q22, [x3, #64] //load h2l | h2h 4527 ext v22.16b, v22.16b, v22.16b, #8 4528 aese v3.16b, v28.16b 4529 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 4530 aese v5.16b, v28.16b 4531 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 4532 4533 ldp q26, q27, [x8, #96] //load rk6, rk7 4534 aese v7.16b, v28.16b 4535 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 4536 rev64 v15.16b, v15.16b //GHASH block 8k+7 4537 4538 aese v4.16b, v28.16b 4539 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 4540.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4541 aese v1.16b, v28.16b 4542 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 4543 4544 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 4545 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4546 aese v2.16b, v28.16b 4547 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 4548 4549 aese v6.16b, v28.16b 4550 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 4551 aese v0.16b, v28.16b 4552 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 4553 rev64 v14.16b, v14.16b //GHASH block 8k+6 4554 4555 ldr q21, [x3, #48] //load h2k | h1k 4556 ldr q24, [x3, #96] //load h4k | h3k 4557 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 4558 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 4559 4560 aese v0.16b, v26.16b 4561 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 4562 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4563 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4564 4565 aese v7.16b, v26.16b 4566 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 4567 aese v2.16b, v26.16b 4568 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 4569 aese v6.16b, v26.16b 4570 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 4571 4572 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 4573 aese v3.16b, v26.16b 4574 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 4575 aese v1.16b, v26.16b 4576 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 4577 4578 aese v2.16b, v27.16b 4579 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 4580 aese v6.16b, v27.16b 4581 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 4582 aese v5.16b, v26.16b 4583 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 4584 4585 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 4586.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 4587.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 4588 4589 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 4590 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4591 aese v4.16b, v26.16b 4592 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 4593 4594 aese v5.16b, v27.16b 4595 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 4596 ldp q28, q26, [x8, #128] //load rk8, rk9 4597 aese v3.16b, v27.16b 4598 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 4599 4600 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4601 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 4602 aese v1.16b, v27.16b 4603 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 4604 4605 aese v4.16b, v27.16b 4606 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 4607 aese v0.16b, v27.16b 4608 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 4609 aese v7.16b, v27.16b 4610 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 4611 4612.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4613 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 4614 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 4615 4616 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 4617 ldr d16, [x10] //MODULO - load modulo constant 4618 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 4619 4620 aese v2.16b, v28.16b 4621 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 4622 aese v5.16b, v28.16b 4623 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 4624 aese v7.16b, v28.16b 4625 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 4626 4627 aese v0.16b, v28.16b 4628 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 4629 aese v3.16b, v28.16b 4630 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 4631.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 4632 4633 aese v4.16b, v28.16b 4634 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 4635 aese v1.16b, v28.16b 4636 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 4637 aese v6.16b, v28.16b 4638 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 4639 4640.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 4641 rev32 v20.16b, v30.16b //CTR block 8k+16 4642 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 4643 4644 aese v5.16b, v26.16b 4645 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 4646.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4647 aese v1.16b, v26.16b 4648 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 4649 4650 aese v3.16b, v26.16b 4651 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 4652 aese v7.16b, v26.16b 4653 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 4654 ldp q27, q28, [x8, #160] //load rk10, rk11 4655 4656.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 4657 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 4658 4659 aese v2.16b, v26.16b 4660 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 4661 aese v0.16b, v26.16b 4662 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 4663 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 4664 4665 rev32 v22.16b, v30.16b //CTR block 8k+17 4666 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 4667 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 4668 4669 aese v6.16b, v26.16b 4670 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 4671 aese v4.16b, v26.16b 4672 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 4673 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 4674 4675 aese v3.16b, v27.16b 4676 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 4677 aese v7.16b, v27.16b 4678 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 4679 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 4680 4681 rev32 v23.16b, v30.16b //CTR block 8k+18 4682 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 4683.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 4684 4685 aese v0.16b, v27.16b 4686 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 4687 aese v1.16b, v27.16b 4688 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 4689 ldr q26, [x8, #192] //load rk12 4690 4691 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 4692 aese v4.16b, v27.16b 4693 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 4694 aese v6.16b, v27.16b 4695 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 4696 4697 aese v0.16b, v28.16b //AES block 8k+8 - round 11 4698 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 4699 aese v1.16b, v28.16b //AES block 8k+9 - round 11 4700 4701 aese v2.16b, v27.16b 4702 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 4703 aese v6.16b, v28.16b //AES block 8k+14 - round 11 4704 aese v3.16b, v28.16b //AES block 8k+11 - round 11 4705 4706.inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result 4707 rev32 v25.16b, v30.16b //CTR block 8k+19 4708 aese v5.16b, v27.16b 4709 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 4710 4711 aese v4.16b, v28.16b //AES block 8k+12 - round 11 4712 aese v2.16b, v28.16b //AES block 8k+10 - round 11 4713 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 4714 4715 aese v7.16b, v28.16b //AES block 8k+15 - round 11 4716 aese v5.16b, v28.16b //AES block 8k+13 - round 11 4717 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 4718 4719.inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result 4720 stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 4721.inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result 4722 4723.inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result 4724.inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 8k+15 - result 4725 stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 4726 4727.inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 8k+13 - result 4728.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 4729 mov v3.16b, v25.16b //CTR block 8k+19 4730 4731.inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 8k+12 - result 4732 stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 4733 cmp x0, x5 //.LOOP CONTROL 4734 4735.inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 8k+14 - result 4736 stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 4737 mov v0.16b, v20.16b //CTR block 8k+16 4738 4739 mov v1.16b, v22.16b //CTR block 8k+17 4740 mov v2.16b, v23.16b //CTR block 8k+18 4741 4742 rev32 v4.16b, v30.16b //CTR block 8k+20 4743 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 4744 b.lt .L192_dec_main_loop 4745 4746.L192_dec_prepretail: //PREPRETAIL 4747 ldp q26, q27, [x8, #0] //load rk0, rk1 4748 rev32 v5.16b, v30.16b //CTR block 8k+13 4749 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 4750 4751 ldr q23, [x3, #176] //load h7l | h7h 4752 ext v23.16b, v23.16b, v23.16b, #8 4753 ldr q25, [x3, #208] //load h8l | h8h 4754 ext v25.16b, v25.16b, v25.16b, #8 4755 rev64 v8.16b, v8.16b //GHASH block 8k 4756 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 4757 4758 rev64 v11.16b, v11.16b //GHASH block 8k+3 4759 rev32 v6.16b, v30.16b //CTR block 8k+14 4760 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 4761 4762 eor v8.16b, v8.16b, v19.16b //PRE 1 4763 rev64 v10.16b, v10.16b //GHASH block 8k+2 4764 rev64 v9.16b, v9.16b //GHASH block 8k+1 4765 4766 ldr q20, [x3, #128] //load h5l | h5h 4767 ext v20.16b, v20.16b, v20.16b, #8 4768 ldr q22, [x3, #160] //load h6l | h6h 4769 ext v22.16b, v22.16b, v22.16b, #8 4770 rev32 v7.16b, v30.16b //CTR block 8k+15 4771 4772 aese v0.16b, v26.16b 4773 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 4774 aese v6.16b, v26.16b 4775 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 4776 aese v5.16b, v26.16b 4777 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 4778 4779 aese v3.16b, v26.16b 4780 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 4781 aese v2.16b, v26.16b 4782 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 4783 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 4784 4785 aese v4.16b, v26.16b 4786 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 4787 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 4788 aese v1.16b, v26.16b 4789 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 4790 4791 aese v6.16b, v27.16b 4792 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 4793 aese v7.16b, v26.16b 4794 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 4795 ldp q28, q26, [x8, #32] //load rk2, rk3 4796 4797 aese v4.16b, v27.16b 4798 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 4799 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 4800 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 4801 4802 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 4803 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 4804 aese v3.16b, v27.16b 4805 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 4806 4807 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 4808 aese v7.16b, v27.16b 4809 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 4810 aese v0.16b, v27.16b 4811 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 4812 4813 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4814 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 4815 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 4816 4817 aese v2.16b, v27.16b 4818 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 4819 aese v1.16b, v27.16b 4820 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 4821 aese v5.16b, v27.16b 4822 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 4823 4824 ldr q21, [x3, #144] //load h6k | h5k 4825 ldr q24, [x3, #192] //load h8k | h7k 4826 aese v3.16b, v28.16b 4827 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 4828 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 4829 4830 aese v6.16b, v28.16b 4831 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 4832 rev64 v13.16b, v13.16b //GHASH block 8k+5 4833 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 4834 4835.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 4836 aese v4.16b, v28.16b 4837 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 4838 aese v5.16b, v28.16b 4839 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 4840 4841 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4842 aese v3.16b, v26.16b 4843 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 4844 aese v7.16b, v28.16b 4845 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 4846 4847 aese v0.16b, v28.16b 4848 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 4849 aese v2.16b, v28.16b 4850 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 4851 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 4852 4853 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 4854 aese v1.16b, v28.16b 4855 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 4856 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 4857 4858 aese v5.16b, v26.16b 4859 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 4860 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4861 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 4862 4863 aese v7.16b, v26.16b 4864 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 4865 aese v6.16b, v26.16b 4866 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 4867 aese v4.16b, v26.16b 4868 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 4869 4870.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 4871 ldp q27, q28, [x8, #64] //load rk4, rk5 4872 aese v0.16b, v26.16b 4873 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 4874 4875 ldr q23, [x3, #80] //load h3l | h3h 4876 ext v23.16b, v23.16b, v23.16b, #8 4877 ldr q25, [x3, #112] //load h4l | h4h 4878 ext v25.16b, v25.16b, v25.16b, #8 4879 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 4880 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 4881 4882 ldr q20, [x3, #32] //load h1l | h1h 4883 ext v20.16b, v20.16b, v20.16b, #8 4884 ldr q22, [x3, #64] //load h2l | h2h 4885 ext v22.16b, v22.16b, v22.16b, #8 4886 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 4887 aese v2.16b, v26.16b 4888 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 4889 4890 rev64 v15.16b, v15.16b //GHASH block 8k+7 4891 4892.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 4893 rev64 v12.16b, v12.16b //GHASH block 8k+4 4894 4895 aese v5.16b, v27.16b 4896 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 4897 aese v4.16b, v27.16b 4898 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 4899 aese v1.16b, v26.16b 4900 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 4901 4902 aese v2.16b, v27.16b 4903 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 4904 aese v0.16b, v27.16b 4905 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 4906 aese v3.16b, v27.16b 4907 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 4908 4909 aese v1.16b, v27.16b 4910 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 4911 aese v6.16b, v27.16b 4912 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 4913 aese v7.16b, v27.16b 4914 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 4915 4916 rev64 v14.16b, v14.16b //GHASH block 8k+6 4917 ldr q21, [x3, #48] //load h2k | h1k 4918 ldr q24, [x3, #96] //load h4k | h3k 4919 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4920 4921 aese v7.16b, v28.16b 4922 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 4923 aese v1.16b, v28.16b 4924 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 4925 aese v2.16b, v28.16b 4926 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 4927 4928 ldp q26, q27, [x8, #96] //load rk6, rk7 4929 aese v6.16b, v28.16b 4930 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 4931 aese v5.16b, v28.16b 4932 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 4933 4934 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 4935 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 4936 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 4937 4938 aese v4.16b, v28.16b 4939 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 4940 4941 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 4942 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 4943 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 4944 4945 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 4946 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4947 aese v0.16b, v28.16b 4948 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 4949 4950 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 4951 aese v3.16b, v28.16b 4952 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 4953 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4954 4955 aese v4.16b, v26.16b 4956 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 4957 aese v2.16b, v26.16b 4958 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 4959 4960 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 4961 aese v1.16b, v26.16b 4962 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 4963 aese v7.16b, v26.16b 4964 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 4965 4966 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 4967 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 4968 aese v0.16b, v26.16b 4969 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 4970 4971 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 4972 aese v5.16b, v26.16b 4973 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 4974 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 4975 4976.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 4977 aese v4.16b, v27.16b 4978 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 4979.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 4980 4981 aese v3.16b, v26.16b 4982 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 4983 aese v6.16b, v26.16b 4984 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 4985 aese v5.16b, v27.16b 4986 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 4987 4988 ldp q28, q26, [x8, #128] //load rk8, rk9 4989 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 4990 aese v2.16b, v27.16b 4991 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 4992 4993 ldr d16, [x10] //MODULO - load modulo constant 4994.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 4995 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 4996 4997 aese v1.16b, v27.16b 4998 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 4999 aese v7.16b, v27.16b 5000 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 5001 aese v6.16b, v27.16b 5002 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 5003 5004.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 5005.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 5006.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 5007 5008 aese v0.16b, v27.16b 5009 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 5010 aese v3.16b, v27.16b 5011 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 5012 5013.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 5014 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 5015 aese v2.16b, v28.16b 5016 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 5017 5018 aese v6.16b, v28.16b 5019 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 5020 aese v7.16b, v28.16b 5021 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 5022 aese v1.16b, v28.16b 5023 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 5024 5025 aese v3.16b, v28.16b 5026 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 5027 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 5028 aese v0.16b, v28.16b 5029 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 5030 5031 aese v5.16b, v28.16b 5032 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 5033 aese v4.16b, v28.16b 5034 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 5035 ldp q27, q28, [x8, #160] //load rk10, rk11 5036 5037.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 5038 aese v7.16b, v26.16b 5039 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 5040 aese v6.16b, v26.16b 5041 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 5042 5043 aese v5.16b, v26.16b 5044 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 5045 aese v2.16b, v26.16b 5046 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 5047 aese v3.16b, v26.16b 5048 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 5049 5050 aese v0.16b, v26.16b 5051 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 5052 aese v1.16b, v26.16b 5053 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 5054 aese v4.16b, v26.16b 5055 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 5056 5057 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 5058 ldr q26, [x8, #192] //load rk12 5059 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 5060 5061 aese v2.16b, v27.16b 5062 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 5063 aese v5.16b, v27.16b 5064 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 5065 aese v0.16b, v27.16b 5066 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 5067 5068 aese v4.16b, v27.16b 5069 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 5070 aese v6.16b, v27.16b 5071 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 5072 aese v7.16b, v27.16b 5073 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 5074 5075 aese v0.16b, v28.16b //AES block 8k+8 - round 11 5076.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 5077 aese v5.16b, v28.16b //AES block 8k+13 - round 11 5078 5079 aese v2.16b, v28.16b //AES block 8k+10 - round 11 5080 aese v3.16b, v27.16b 5081 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 5082 aese v1.16b, v27.16b 5083 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 5084 5085 aese v6.16b, v28.16b //AES block 8k+14 - round 11 5086 aese v4.16b, v28.16b //AES block 8k+12 - round 11 5087 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 5088 5089 aese v3.16b, v28.16b //AES block 8k+11 - round 11 5090 aese v1.16b, v28.16b //AES block 8k+9 - round 11 5091 aese v7.16b, v28.16b //AES block 8k+15 - round 11 5092 5093.L192_dec_tail: //TAIL 5094 5095 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 5096 5097 ldp q20, q21, [x3, #128] //load h5l | h5h 5098 ext v20.16b, v20.16b, v20.16b, #8 5099 ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 5100 5101 ldp q24, q25, [x3, #192] //load h8k | h7k 5102 ext v25.16b, v25.16b, v25.16b, #8 5103 5104 mov v29.16b, v26.16b 5105 5106 ldp q22, q23, [x3, #160] //load h6l | h6h 5107 ext v22.16b, v22.16b, v22.16b, #8 5108 ext v23.16b, v23.16b, v23.16b, #8 5109 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 5110 5111.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 5112 cmp x5, #112 5113 b.gt .L192_dec_blocks_more_than_7 5114 5115 mov v7.16b, v6.16b 5116 movi v17.8b, #0 5117 sub v30.4s, v30.4s, v31.4s 5118 5119 mov v6.16b, v5.16b 5120 mov v5.16b, v4.16b 5121 mov v4.16b, v3.16b 5122 5123 cmp x5, #96 5124 movi v19.8b, #0 5125 mov v3.16b, v2.16b 5126 5127 mov v2.16b, v1.16b 5128 movi v18.8b, #0 5129 b.gt .L192_dec_blocks_more_than_6 5130 5131 mov v7.16b, v6.16b 5132 mov v6.16b, v5.16b 5133 mov v5.16b, v4.16b 5134 5135 mov v4.16b, v3.16b 5136 mov v3.16b, v1.16b 5137 5138 sub v30.4s, v30.4s, v31.4s 5139 cmp x5, #80 5140 b.gt .L192_dec_blocks_more_than_5 5141 5142 mov v7.16b, v6.16b 5143 mov v6.16b, v5.16b 5144 5145 mov v5.16b, v4.16b 5146 mov v4.16b, v1.16b 5147 cmp x5, #64 5148 5149 sub v30.4s, v30.4s, v31.4s 5150 b.gt .L192_dec_blocks_more_than_4 5151 5152 sub v30.4s, v30.4s, v31.4s 5153 mov v7.16b, v6.16b 5154 mov v6.16b, v5.16b 5155 5156 mov v5.16b, v1.16b 5157 cmp x5, #48 5158 b.gt .L192_dec_blocks_more_than_3 5159 5160 sub v30.4s, v30.4s, v31.4s 5161 mov v7.16b, v6.16b 5162 cmp x5, #32 5163 5164 mov v6.16b, v1.16b 5165 ldr q24, [x3, #96] //load h4k | h3k 5166 b.gt .L192_dec_blocks_more_than_2 5167 5168 sub v30.4s, v30.4s, v31.4s 5169 5170 mov v7.16b, v1.16b 5171 cmp x5, #16 5172 b.gt .L192_dec_blocks_more_than_1 5173 5174 sub v30.4s, v30.4s, v31.4s 5175 ldr q21, [x3, #48] //load h2k | h1k 5176 b .L192_dec_blocks_less_than_1 5177.L192_dec_blocks_more_than_7: //blocks left > 7 5178 rev64 v8.16b, v9.16b //GHASH final-7 block 5179 5180 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 5181 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5182 5183 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 5184 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 5185 ldr q9, [x0], #16 //AES final-6 block - load ciphertext 5186 5187 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 5188 5189 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 5190 st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 5191 5192.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 5193 5194 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 5195 movi v16.8b, #0 //suppress further partial tag feed in 5196.L192_dec_blocks_more_than_6: //blocks left > 6 5197 5198 rev64 v8.16b, v9.16b //GHASH final-6 block 5199 5200 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5201 5202 ldr q9, [x0], #16 //AES final-5 block - load ciphertext 5203 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 5204 5205 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 5206 movi v16.8b, #0 //suppress further partial tag feed in 5207 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 5208 5209 st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 5210.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 5211 5212 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 5213 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 5214 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 5215 5216 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 5217 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 5218.L192_dec_blocks_more_than_5: //blocks left > 5 5219 5220 rev64 v8.16b, v9.16b //GHASH final-5 block 5221 5222 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5223 5224 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 5225 5226 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 5227 5228 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 5229 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 5230 5231 ldr q9, [x0], #16 //AES final-4 block - load ciphertext 5232 5233 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 5234 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 5235 5236 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 5237 5238 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 5239 movi v16.8b, #0 //suppress further partial tag feed in 5240 st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 5241 5242 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 5243.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 5244.L192_dec_blocks_more_than_4: //blocks left > 4 5245 5246 rev64 v8.16b, v9.16b //GHASH final-4 block 5247 5248 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5249 movi v16.8b, #0 //suppress further partial tag feed in 5250 5251 ldr q9, [x0], #16 //AES final-3 block - load ciphertext 5252 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 5253 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 5254 5255 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 5256 5257 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 5258 5259 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 5260 st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 5261 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 5262 5263.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 5264 5265 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 5266 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 5267.L192_dec_blocks_more_than_3: //blocks left > 3 5268 5269 ldr q25, [x3, #112] //load h4l | h4h 5270 ext v25.16b, v25.16b, v25.16b, #8 5271 rev64 v8.16b, v9.16b //GHASH final-3 block 5272 ldr q9, [x0], #16 //AES final-2 block - load ciphertext 5273 5274 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5275 5276 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 5277 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 5278 5279 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 5280 movi v16.8b, #0 //suppress further partial tag feed in 5281 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 5282 5283 st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 5284 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 5285.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 5286 5287 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 5288 ldr q24, [x3, #96] //load h4k | h3k 5289 5290 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 5291 5292 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 5293 5294 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 5295.L192_dec_blocks_more_than_2: //blocks left > 2 5296 5297 rev64 v8.16b, v9.16b //GHASH final-2 block 5298 ldr q23, [x3, #80] //load h3l | h3h 5299 ext v23.16b, v23.16b, v23.16b, #8 5300 5301 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5302 5303 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 5304 ldr q9, [x0], #16 //AES final-1 block - load ciphertext 5305 5306 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 5307 5308 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 5309 5310 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 5311 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 5312 5313 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 5314 movi v16.8b, #0 //suppress further partial tag feed in 5315 5316 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 5317 st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 5318 5319 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 5320.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 5321.L192_dec_blocks_more_than_1: //blocks left > 1 5322 5323 rev64 v8.16b, v9.16b //GHASH final-1 block 5324 ldr q9, [x0], #16 //AES final block - load ciphertext 5325 ldr q22, [x3, #64] //load h1l | h1h 5326 ext v22.16b, v22.16b, v22.16b, #8 5327 5328 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5329 movi v16.8b, #0 //suppress further partial tag feed in 5330 ldr q21, [x3, #48] //load h2k | h1k 5331 5332 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 5333 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 5334 st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 5335 5336 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 5337 5338.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 5339 5340 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 5341 5342 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 5343 5344 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 5345 5346 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 5347 5348 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 5349 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 5350.L192_dec_blocks_less_than_1: //blocks left <= 1 5351 5352 rev32 v30.16b, v30.16b 5353 and x1, x1, #127 //bit_length %= 128 5354 5355 sub x1, x1, #128 //bit_length -= 128 5356 str q30, [x16] //store the updated counter 5357 5358 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 5359 mvn x6, xzr //temp0_x = 0xffffffffffffffff 5360 5361 and x1, x1, #127 //bit_length %= 128 5362 5363 mvn x7, xzr //temp1_x = 0xffffffffffffffff 5364 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 5365 cmp x1, #64 5366 5367 csel x13, x7, x6, lt 5368 csel x14, x6, xzr, lt 5369 ldr q20, [x3, #32] //load h1l | h1h 5370 ext v20.16b, v20.16b, v20.16b, #8 5371 5372 mov v0.d[1], x14 5373 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 5374 5375 mov v0.d[0], x13 //ctr0b is mask for last block 5376 5377 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 5378 bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 5379 5380 rev64 v8.16b, v9.16b //GHASH final block 5381 5382 st1 { v12.16b}, [x2] //store all 16B 5383 5384 eor v8.16b, v8.16b, v16.16b //feed in partial tag 5385 5386 ins v16.d[0], v8.d[1] //GHASH final block - mid 5387 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 5388 5389 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 5390 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 5391 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 5392 5393 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 5394 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 5395 5396 eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 5397 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 5398 ldr d16, [x10] //MODULO - load modulo constant 5399 5400 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 5401 ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 5402 5403 eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 5404 5405.inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid 5406 5407 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 5408 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 5409 5410.inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low 5411 ext v19.16b, v19.16b, v19.16b, #8 5412 rev64 v19.16b, v19.16b 5413 st1 { v19.16b }, [x3] 5414 5415 mov x0, x9 5416 5417 ldp d10, d11, [sp, #16] 5418 ldp d12, d13, [sp, #32] 5419 ldp d14, d15, [sp, #48] 5420 ldp d8, d9, [sp], #80 5421 ret 5422 5423.L192_dec_ret: 5424 mov w0, #0x0 5425 ret 5426.size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel 5427.globl unroll8_eor3_aes_gcm_enc_256_kernel 5428.type unroll8_eor3_aes_gcm_enc_256_kernel,%function 5429.align 4 5430unroll8_eor3_aes_gcm_enc_256_kernel: 5431 AARCH64_VALID_CALL_TARGET 5432 cbz x1, .L256_enc_ret 5433 stp d8, d9, [sp, #-80]! 5434 lsr x9, x1, #3 5435 mov x16, x4 5436 mov x8, x5 5437 stp d10, d11, [sp, #16] 5438 stp d12, d13, [sp, #32] 5439 stp d14, d15, [sp, #48] 5440 mov x5, #0xc200000000000000 5441 stp x5, xzr, [sp, #64] 5442 add x10, sp, #64 5443 5444 ld1 { v0.16b}, [x16] //CTR block 0 5445 5446 mov x5, x9 5447 5448 mov x15, #0x100000000 //set up counter increment 5449 movi v31.16b, #0x0 5450 mov v31.d[1], x15 5451 sub x5, x5, #1 //byte_len - 1 5452 5453 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 5454 5455 add x5, x5, x0 5456 5457 rev32 v30.16b, v0.16b //set up reversed counter 5458 5459 add v30.4s, v30.4s, v31.4s //CTR block 0 5460 5461 rev32 v1.16b, v30.16b //CTR block 1 5462 add v30.4s, v30.4s, v31.4s //CTR block 1 5463 5464 rev32 v2.16b, v30.16b //CTR block 2 5465 add v30.4s, v30.4s, v31.4s //CTR block 2 5466 5467 rev32 v3.16b, v30.16b //CTR block 3 5468 add v30.4s, v30.4s, v31.4s //CTR block 3 5469 5470 rev32 v4.16b, v30.16b //CTR block 4 5471 add v30.4s, v30.4s, v31.4s //CTR block 4 5472 5473 rev32 v5.16b, v30.16b //CTR block 5 5474 add v30.4s, v30.4s, v31.4s //CTR block 5 5475 ldp q26, q27, [x8, #0] //load rk0, rk1 5476 5477 rev32 v6.16b, v30.16b //CTR block 6 5478 add v30.4s, v30.4s, v31.4s //CTR block 6 5479 5480 rev32 v7.16b, v30.16b //CTR block 7 5481 5482 aese v3.16b, v26.16b 5483 aesmc v3.16b, v3.16b //AES block 3 - round 0 5484 aese v4.16b, v26.16b 5485 aesmc v4.16b, v4.16b //AES block 4 - round 0 5486 aese v2.16b, v26.16b 5487 aesmc v2.16b, v2.16b //AES block 2 - round 0 5488 5489 aese v0.16b, v26.16b 5490 aesmc v0.16b, v0.16b //AES block 0 - round 0 5491 aese v1.16b, v26.16b 5492 aesmc v1.16b, v1.16b //AES block 1 - round 0 5493 aese v6.16b, v26.16b 5494 aesmc v6.16b, v6.16b //AES block 6 - round 0 5495 5496 aese v5.16b, v26.16b 5497 aesmc v5.16b, v5.16b //AES block 5 - round 0 5498 aese v7.16b, v26.16b 5499 aesmc v7.16b, v7.16b //AES block 7 - round 0 5500 ldp q28, q26, [x8, #32] //load rk2, rk3 5501 5502 aese v4.16b, v27.16b 5503 aesmc v4.16b, v4.16b //AES block 4 - round 1 5504 aese v1.16b, v27.16b 5505 aesmc v1.16b, v1.16b //AES block 1 - round 1 5506 aese v3.16b, v27.16b 5507 aesmc v3.16b, v3.16b //AES block 3 - round 1 5508 5509 aese v6.16b, v27.16b 5510 aesmc v6.16b, v6.16b //AES block 6 - round 1 5511 aese v5.16b, v27.16b 5512 aesmc v5.16b, v5.16b //AES block 5 - round 1 5513 5514 aese v2.16b, v27.16b 5515 aesmc v2.16b, v2.16b //AES block 2 - round 1 5516 5517 aese v7.16b, v27.16b 5518 aesmc v7.16b, v7.16b //AES block 7 - round 1 5519 5520 aese v2.16b, v28.16b 5521 aesmc v2.16b, v2.16b //AES block 2 - round 2 5522 aese v3.16b, v28.16b 5523 aesmc v3.16b, v3.16b //AES block 3 - round 2 5524 aese v0.16b, v27.16b 5525 aesmc v0.16b, v0.16b //AES block 0 - round 1 5526 5527 aese v7.16b, v28.16b 5528 aesmc v7.16b, v7.16b //AES block 7 - round 2 5529 aese v6.16b, v28.16b 5530 aesmc v6.16b, v6.16b //AES block 6 - round 2 5531 aese v5.16b, v28.16b 5532 aesmc v5.16b, v5.16b //AES block 5 - round 2 5533 5534 aese v4.16b, v28.16b 5535 aesmc v4.16b, v4.16b //AES block 4 - round 2 5536 aese v0.16b, v28.16b 5537 aesmc v0.16b, v0.16b //AES block 0 - round 2 5538 aese v1.16b, v28.16b 5539 aesmc v1.16b, v1.16b //AES block 1 - round 2 5540 5541 aese v5.16b, v26.16b 5542 aesmc v5.16b, v5.16b //AES block 5 - round 3 5543 aese v3.16b, v26.16b 5544 aesmc v3.16b, v3.16b //AES block 3 - round 3 5545 ldp q27, q28, [x8, #64] //load rk4, rk5 5546 5547 aese v4.16b, v26.16b 5548 aesmc v4.16b, v4.16b //AES block 4 - round 3 5549 5550 aese v1.16b, v26.16b 5551 aesmc v1.16b, v1.16b //AES block 1 - round 3 5552 aese v6.16b, v26.16b 5553 aesmc v6.16b, v6.16b //AES block 6 - round 3 5554 aese v7.16b, v26.16b 5555 aesmc v7.16b, v7.16b //AES block 7 - round 3 5556 5557 aese v2.16b, v26.16b 5558 aesmc v2.16b, v2.16b //AES block 2 - round 3 5559 aese v0.16b, v26.16b 5560 aesmc v0.16b, v0.16b //AES block 0 - round 3 5561 5562 aese v4.16b, v27.16b 5563 aesmc v4.16b, v4.16b //AES block 4 - round 4 5564 aese v6.16b, v27.16b 5565 aesmc v6.16b, v6.16b //AES block 6 - round 4 5566 aese v1.16b, v27.16b 5567 aesmc v1.16b, v1.16b //AES block 1 - round 4 5568 5569 aese v2.16b, v27.16b 5570 aesmc v2.16b, v2.16b //AES block 2 - round 4 5571 aese v0.16b, v27.16b 5572 aesmc v0.16b, v0.16b //AES block 0 - round 4 5573 5574 aese v3.16b, v27.16b 5575 aesmc v3.16b, v3.16b //AES block 3 - round 4 5576 aese v7.16b, v27.16b 5577 aesmc v7.16b, v7.16b //AES block 7 - round 4 5578 aese v5.16b, v27.16b 5579 aesmc v5.16b, v5.16b //AES block 5 - round 4 5580 5581 aese v0.16b, v28.16b 5582 aesmc v0.16b, v0.16b //AES block 0 - round 5 5583 aese v2.16b, v28.16b 5584 aesmc v2.16b, v2.16b //AES block 2 - round 5 5585 ldp q26, q27, [x8, #96] //load rk6, rk7 5586 5587 aese v1.16b, v28.16b 5588 aesmc v1.16b, v1.16b //AES block 1 - round 5 5589 aese v4.16b, v28.16b 5590 aesmc v4.16b, v4.16b //AES block 4 - round 5 5591 aese v5.16b, v28.16b 5592 aesmc v5.16b, v5.16b //AES block 5 - round 5 5593 5594 aese v3.16b, v28.16b 5595 aesmc v3.16b, v3.16b //AES block 3 - round 5 5596 aese v6.16b, v28.16b 5597 aesmc v6.16b, v6.16b //AES block 6 - round 5 5598 aese v7.16b, v28.16b 5599 aesmc v7.16b, v7.16b //AES block 7 - round 5 5600 5601 aese v1.16b, v26.16b 5602 aesmc v1.16b, v1.16b //AES block 1 - round 6 5603 aese v5.16b, v26.16b 5604 aesmc v5.16b, v5.16b //AES block 5 - round 6 5605 aese v4.16b, v26.16b 5606 aesmc v4.16b, v4.16b //AES block 4 - round 6 5607 5608 aese v2.16b, v26.16b 5609 aesmc v2.16b, v2.16b //AES block 2 - round 6 5610 aese v6.16b, v26.16b 5611 aesmc v6.16b, v6.16b //AES block 6 - round 6 5612 aese v0.16b, v26.16b 5613 aesmc v0.16b, v0.16b //AES block 0 - round 6 5614 5615 aese v7.16b, v26.16b 5616 aesmc v7.16b, v7.16b //AES block 7 - round 6 5617 aese v3.16b, v26.16b 5618 aesmc v3.16b, v3.16b //AES block 3 - round 6 5619 ldp q28, q26, [x8, #128] //load rk8, rk9 5620 5621 aese v2.16b, v27.16b 5622 aesmc v2.16b, v2.16b //AES block 2 - round 7 5623 aese v0.16b, v27.16b 5624 aesmc v0.16b, v0.16b //AES block 0 - round 7 5625 5626 aese v7.16b, v27.16b 5627 aesmc v7.16b, v7.16b //AES block 7 - round 7 5628 aese v6.16b, v27.16b 5629 aesmc v6.16b, v6.16b //AES block 6 - round 7 5630 aese v1.16b, v27.16b 5631 aesmc v1.16b, v1.16b //AES block 1 - round 7 5632 5633 aese v5.16b, v27.16b 5634 aesmc v5.16b, v5.16b //AES block 5 - round 7 5635 aese v3.16b, v27.16b 5636 aesmc v3.16b, v3.16b //AES block 3 - round 7 5637 5638 aese v4.16b, v27.16b 5639 aesmc v4.16b, v4.16b //AES block 4 - round 7 5640 5641 aese v6.16b, v28.16b 5642 aesmc v6.16b, v6.16b //AES block 6 - round 8 5643 aese v1.16b, v28.16b 5644 aesmc v1.16b, v1.16b //AES block 1 - round 8 5645 5646 aese v3.16b, v28.16b 5647 aesmc v3.16b, v3.16b //AES block 3 - round 8 5648 aese v0.16b, v28.16b 5649 aesmc v0.16b, v0.16b //AES block 0 - round 8 5650 aese v7.16b, v28.16b 5651 aesmc v7.16b, v7.16b //AES block 7 - round 8 5652 5653 aese v5.16b, v28.16b 5654 aesmc v5.16b, v5.16b //AES block 5 - round 8 5655 aese v4.16b, v28.16b 5656 aesmc v4.16b, v4.16b //AES block 4 - round 8 5657 aese v2.16b, v28.16b 5658 aesmc v2.16b, v2.16b //AES block 2 - round 8 5659 5660 ld1 { v19.16b}, [x3] 5661 ext v19.16b, v19.16b, v19.16b, #8 5662 rev64 v19.16b, v19.16b 5663 ldp q27, q28, [x8, #160] //load rk10, rk11 5664 5665 aese v6.16b, v26.16b 5666 aesmc v6.16b, v6.16b //AES block 6 - round 9 5667 aese v7.16b, v26.16b 5668 aesmc v7.16b, v7.16b //AES block 7 - round 9 5669 aese v3.16b, v26.16b 5670 aesmc v3.16b, v3.16b //AES block 3 - round 9 5671 5672 aese v4.16b, v26.16b 5673 aesmc v4.16b, v4.16b //AES block 4 - round 9 5674 aese v5.16b, v26.16b 5675 aesmc v5.16b, v5.16b //AES block 5 - round 9 5676 aese v2.16b, v26.16b 5677 aesmc v2.16b, v2.16b //AES block 2 - round 9 5678 5679 aese v1.16b, v26.16b 5680 aesmc v1.16b, v1.16b //AES block 1 - round 9 5681 5682 aese v7.16b, v27.16b 5683 aesmc v7.16b, v7.16b //AES block 7 - round 10 5684 aese v4.16b, v27.16b 5685 aesmc v4.16b, v4.16b //AES block 4 - round 10 5686 aese v0.16b, v26.16b 5687 aesmc v0.16b, v0.16b //AES block 0 - round 9 5688 5689 aese v1.16b, v27.16b 5690 aesmc v1.16b, v1.16b //AES block 1 - round 10 5691 aese v5.16b, v27.16b 5692 aesmc v5.16b, v5.16b //AES block 5 - round 10 5693 aese v3.16b, v27.16b 5694 aesmc v3.16b, v3.16b //AES block 3 - round 10 5695 5696 aese v2.16b, v27.16b 5697 aesmc v2.16b, v2.16b //AES block 2 - round 10 5698 aese v0.16b, v27.16b 5699 aesmc v0.16b, v0.16b //AES block 0 - round 10 5700 aese v6.16b, v27.16b 5701 aesmc v6.16b, v6.16b //AES block 6 - round 10 5702 5703 aese v4.16b, v28.16b 5704 aesmc v4.16b, v4.16b //AES block 4 - round 11 5705 ldp q26, q27, [x8, #192] //load rk12, rk13 5706 aese v5.16b, v28.16b 5707 aesmc v5.16b, v5.16b //AES block 5 - round 11 5708 5709 aese v2.16b, v28.16b 5710 aesmc v2.16b, v2.16b //AES block 2 - round 11 5711 aese v6.16b, v28.16b 5712 aesmc v6.16b, v6.16b //AES block 6 - round 11 5713 aese v1.16b, v28.16b 5714 aesmc v1.16b, v1.16b //AES block 1 - round 11 5715 5716 aese v0.16b, v28.16b 5717 aesmc v0.16b, v0.16b //AES block 0 - round 11 5718 aese v3.16b, v28.16b 5719 aesmc v3.16b, v3.16b //AES block 3 - round 11 5720 aese v7.16b, v28.16b 5721 aesmc v7.16b, v7.16b //AES block 7 - round 11 5722 5723 add v30.4s, v30.4s, v31.4s //CTR block 7 5724 ldr q28, [x8, #224] //load rk14 5725 5726 aese v4.16b, v26.16b 5727 aesmc v4.16b, v4.16b //AES block 4 - round 12 5728 aese v2.16b, v26.16b 5729 aesmc v2.16b, v2.16b //AES block 2 - round 12 5730 aese v1.16b, v26.16b 5731 aesmc v1.16b, v1.16b //AES block 1 - round 12 5732 5733 aese v0.16b, v26.16b 5734 aesmc v0.16b, v0.16b //AES block 0 - round 12 5735 aese v5.16b, v26.16b 5736 aesmc v5.16b, v5.16b //AES block 5 - round 12 5737 aese v3.16b, v26.16b 5738 aesmc v3.16b, v3.16b //AES block 3 - round 12 5739 5740 aese v2.16b, v27.16b //AES block 2 - round 13 5741 aese v1.16b, v27.16b //AES block 1 - round 13 5742 aese v4.16b, v27.16b //AES block 4 - round 13 5743 5744 aese v6.16b, v26.16b 5745 aesmc v6.16b, v6.16b //AES block 6 - round 12 5746 aese v7.16b, v26.16b 5747 aesmc v7.16b, v7.16b //AES block 7 - round 12 5748 5749 aese v0.16b, v27.16b //AES block 0 - round 13 5750 aese v5.16b, v27.16b //AES block 5 - round 13 5751 5752 aese v6.16b, v27.16b //AES block 6 - round 13 5753 aese v7.16b, v27.16b //AES block 7 - round 13 5754 aese v3.16b, v27.16b //AES block 3 - round 13 5755 5756 add x4, x0, x1, lsr #3 //end_input_ptr 5757 cmp x0, x5 //check if we have <= 8 blocks 5758 b.ge .L256_enc_tail //handle tail 5759 5760 ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext 5761 5762 ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext 5763 5764.inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result 5765 rev32 v0.16b, v30.16b //CTR block 8 5766 add v30.4s, v30.4s, v31.4s //CTR block 8 5767 5768.inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result 5769.inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result 5770 5771 rev32 v1.16b, v30.16b //CTR block 9 5772 add v30.4s, v30.4s, v31.4s //CTR block 9 5773 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 5774 5775 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 5776.inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result 5777 cmp x0, x5 //check if we have <= 8 blocks 5778 5779 rev32 v2.16b, v30.16b //CTR block 10 5780 add v30.4s, v30.4s, v31.4s //CTR block 10 5781 stp q8, q9, [x2], #32 //AES block 0, 1 - store result 5782 5783 stp q10, q11, [x2], #32 //AES block 2, 3 - store result 5784 5785 rev32 v3.16b, v30.16b //CTR block 11 5786 add v30.4s, v30.4s, v31.4s //CTR block 11 5787 5788.inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 5789 5790.inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 5791.inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 5792.inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 5793 5794 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 5795 rev32 v4.16b, v30.16b //CTR block 12 5796 5797 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 5798 add v30.4s, v30.4s, v31.4s //CTR block 12 5799 b.ge .L256_enc_prepretail //do prepretail 5800 5801.L256_enc_main_loop: //main loop start 5802 ldp q26, q27, [x8, #0] //load rk0, rk1 5803 5804 rev32 v5.16b, v30.16b //CTR block 8k+13 5805 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 5806 ldr q21, [x3, #144] //load h6k | h5k 5807 ldr q24, [x3, #192] //load h8k | h7k 5808 5809 rev64 v11.16b, v11.16b //GHASH block 8k+3 5810 ldr q20, [x3, #128] //load h5l | h5h 5811 ext v20.16b, v20.16b, v20.16b, #8 5812 ldr q22, [x3, #160] //load h6l | h6h 5813 ext v22.16b, v22.16b, v22.16b, #8 5814 rev64 v9.16b, v9.16b //GHASH block 8k+1 5815 5816 rev32 v6.16b, v30.16b //CTR block 8k+14 5817 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 5818 rev64 v8.16b, v8.16b //GHASH block 8k 5819 5820 rev64 v12.16b, v12.16b //GHASH block 8k+4 5821 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 5822 ldr q23, [x3, #176] //load h7l | h7h 5823 ext v23.16b, v23.16b, v23.16b, #8 5824 ldr q25, [x3, #208] //load h8l | h8h 5825 ext v25.16b, v25.16b, v25.16b, #8 5826 5827 aese v3.16b, v26.16b 5828 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 5829 aese v5.16b, v26.16b 5830 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 5831 rev32 v7.16b, v30.16b //CTR block 8k+15 5832 5833 aese v0.16b, v26.16b 5834 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 5835 aese v1.16b, v26.16b 5836 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 5837 aese v6.16b, v26.16b 5838 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 5839 5840 aese v7.16b, v26.16b 5841 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 5842 aese v2.16b, v26.16b 5843 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 5844 aese v4.16b, v26.16b 5845 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 5846 5847 ldp q28, q26, [x8, #32] //load rk2, rk3 5848 eor v8.16b, v8.16b, v19.16b //PRE 1 5849 aese v6.16b, v27.16b 5850 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 5851 5852 aese v2.16b, v27.16b 5853 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 5854 aese v1.16b, v27.16b 5855 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 5856 aese v0.16b, v27.16b 5857 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 5858 5859 aese v4.16b, v27.16b 5860 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 5861 aese v3.16b, v27.16b 5862 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 5863 aese v5.16b, v27.16b 5864 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 5865 5866 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 5867 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 5868 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 5869 5870 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 5871 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 5872 aese v7.16b, v27.16b 5873 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 5874 5875 aese v1.16b, v28.16b 5876 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 5877 aese v5.16b, v28.16b 5878 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 5879 aese v6.16b, v28.16b 5880 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 5881 5882 aese v2.16b, v28.16b 5883 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 5884 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 5885 aese v4.16b, v28.16b 5886 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 5887 5888 aese v5.16b, v26.16b 5889 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 5890 aese v6.16b, v26.16b 5891 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 5892 aese v0.16b, v28.16b 5893 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 5894 5895 aese v1.16b, v26.16b 5896 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 5897 aese v7.16b, v28.16b 5898 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 5899 aese v3.16b, v28.16b 5900 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 5901 5902 aese v4.16b, v26.16b 5903 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 5904 rev64 v14.16b, v14.16b //GHASH block 8k+6 5905 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 5906 5907 aese v3.16b, v26.16b 5908 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 5909 ldp q27, q28, [x8, #64] //load rk4, rk5 5910 rev64 v10.16b, v10.16b //GHASH block 8k+2 5911 5912 aese v2.16b, v26.16b 5913 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 5914 aese v7.16b, v26.16b 5915 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 5916 aese v0.16b, v26.16b 5917 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 5918 5919 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 5920 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 5921 rev64 v13.16b, v13.16b //GHASH block 8k+5 5922 5923 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 5924 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 5925 ldr q23, [x3, #80] //load h3l | h3h 5926 ext v23.16b, v23.16b, v23.16b, #8 5927 ldr q25, [x3, #112] //load h4l | h4h 5928 ext v25.16b, v25.16b, v25.16b, #8 5929 5930 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 5931.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 5932 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 5933 5934 aese v4.16b, v27.16b 5935 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 5936 aese v1.16b, v27.16b 5937 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 5938 aese v5.16b, v27.16b 5939 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 5940 5941 aese v7.16b, v27.16b 5942 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 5943 aese v3.16b, v27.16b 5944 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 5945 aese v2.16b, v27.16b 5946 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 5947 5948 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 5949 aese v6.16b, v27.16b 5950 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 5951 aese v0.16b, v27.16b 5952 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 5953 5954 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 5955 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 5956 ldp q26, q27, [x8, #96] //load rk6, rk7 5957 5958 aese v5.16b, v28.16b 5959 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 5960 aese v7.16b, v28.16b 5961 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 5962 aese v4.16b, v28.16b 5963 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 5964 5965 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 5966 aese v2.16b, v28.16b 5967 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 5968 rev64 v15.16b, v15.16b //GHASH block 8k+7 5969 5970 aese v3.16b, v28.16b 5971 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 5972 aese v6.16b, v28.16b 5973 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 5974 aese v1.16b, v28.16b 5975 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 5976 5977 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 5978 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 5979 aese v0.16b, v28.16b 5980 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 5981 5982 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 5983 aese v4.16b, v26.16b 5984 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 5985 aese v2.16b, v26.16b 5986 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 5987 5988 aese v6.16b, v26.16b 5989 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 5990 aese v1.16b, v26.16b 5991 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 5992 aese v7.16b, v26.16b 5993 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 5994 5995 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 5996 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 5997 aese v5.16b, v26.16b 5998 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 5999 6000.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 6001 aese v3.16b, v26.16b 6002 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 6003 aese v0.16b, v26.16b 6004 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 6005 6006 ldp q28, q26, [x8, #128] //load rk8, rk9 6007 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 6008 aese v5.16b, v27.16b 6009 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 6010 6011 ldr q20, [x3, #32] //load h1l | h1h 6012 ext v20.16b, v20.16b, v20.16b, #8 6013 ldr q22, [x3, #64] //load h2l | h2h 6014 ext v22.16b, v22.16b, v22.16b, #8 6015 aese v2.16b, v27.16b 6016 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 6017.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6018 6019 ldr q21, [x3, #48] //load h2k | h1k 6020 ldr q24, [x3, #96] //load h4k | h3k 6021 aese v6.16b, v27.16b 6022 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 6023 aese v3.16b, v27.16b 6024 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 6025 6026 aese v0.16b, v27.16b 6027 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 6028 aese v7.16b, v27.16b 6029 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 6030 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 6031 6032 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6033 aese v4.16b, v27.16b 6034 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 6035 aese v1.16b, v27.16b 6036 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 6037 6038 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 6039 aese v7.16b, v28.16b 6040 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 6041 aese v0.16b, v28.16b 6042 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 6043 6044 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 6045 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6046 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6047 6048 aese v3.16b, v28.16b 6049 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 6050 aese v0.16b, v26.16b 6051 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 6052 aese v1.16b, v28.16b 6053 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 6054 6055 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 6056 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 6057 aese v2.16b, v28.16b 6058 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 6059 6060 aese v5.16b, v28.16b 6061 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 6062 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 6063 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 6064 6065 aese v6.16b, v28.16b 6066 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 6067 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6068 aese v4.16b, v28.16b 6069 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 6070 6071.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6072 aese v7.16b, v26.16b 6073 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 6074 aese v5.16b, v26.16b 6075 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 6076 6077 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6078 aese v6.16b, v26.16b 6079 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 6080 aese v4.16b, v26.16b 6081 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 6082 6083 ldp q27, q28, [x8, #160] //load rk10, rk11 6084 aese v2.16b, v26.16b 6085 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 6086 aese v3.16b, v26.16b 6087 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 6088 6089 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 6090.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 6091 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 6092 6093 ldr d16, [x10] //MODULO - load modulo constant 6094 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 6095 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 6096 6097 aese v1.16b, v26.16b 6098 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 6099 6100.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6101.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 6102.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 6103 6104 aese v4.16b, v27.16b 6105 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 6106 aese v3.16b, v27.16b 6107 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 6108 aese v5.16b, v27.16b 6109 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 6110 6111 aese v0.16b, v27.16b 6112 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 6113 aese v2.16b, v27.16b 6114 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 6115 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 6116 6117 aese v1.16b, v27.16b 6118 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 6119 aese v7.16b, v27.16b 6120 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 6121 aese v6.16b, v27.16b 6122 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 6123 6124.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 6125 6126 ldp q26, q27, [x8, #192] //load rk12, rk13 6127 rev32 v20.16b, v30.16b //CTR block 8k+16 6128 6129 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6130 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext 6131 aese v2.16b, v28.16b 6132 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 6133 6134 aese v6.16b, v28.16b 6135 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 6136 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 6137 aese v3.16b, v28.16b 6138 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 6139 6140 aese v0.16b, v28.16b 6141 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 6142 aese v7.16b, v28.16b 6143 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 6144 6145 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6146 aese v1.16b, v28.16b 6147 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 6148 6149 aese v7.16b, v26.16b 6150 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 6151 aese v5.16b, v28.16b 6152 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 6153 6154 aese v3.16b, v26.16b 6155 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 6156 aese v6.16b, v26.16b 6157 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 6158 rev32 v22.16b, v30.16b //CTR block 8k+17 6159 6160 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 6161 aese v4.16b, v28.16b 6162 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 6163.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6164 6165 aese v5.16b, v26.16b 6166 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 6167 ldr q28, [x8, #224] //load rk14 6168 aese v7.16b, v27.16b //AES block 8k+15 - round 13 6169 6170 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext 6171 aese v2.16b, v26.16b 6172 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 6173 aese v4.16b, v26.16b 6174 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 6175 6176.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6177 aese v1.16b, v26.16b 6178 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 6179 ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext 6180 6181 ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext 6182 aese v2.16b, v27.16b //AES block 8k+10 - round 13 6183 aese v4.16b, v27.16b //AES block 8k+12 - round 13 6184 6185 rev32 v23.16b, v30.16b //CTR block 8k+18 6186 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 6187 aese v5.16b, v27.16b //AES block 8k+13 - round 13 6188 6189 aese v0.16b, v26.16b 6190 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 6191 aese v3.16b, v27.16b //AES block 8k+11 - round 13 6192 cmp x0, x5 //.LOOP CONTROL 6193 6194.inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result 6195 rev32 v25.16b, v30.16b //CTR block 8k+19 6196 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 6197 6198 aese v0.16b, v27.16b //AES block 8k+8 - round 13 6199 aese v6.16b, v27.16b //AES block 8k+14 - round 13 6200.inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 6201 6202 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6203 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6204 aese v1.16b, v27.16b //AES block 8k+9 - round 13 6205 6206.inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 6207 rev32 v4.16b, v30.16b //CTR block 8k+20 6208.inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result 6209 6210 mov v3.16b, v25.16b //CTR block 8k+19 6211.inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result 6212.inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result 6213 6214 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 6215 stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result 6216 mov v2.16b, v23.16b //CTR block 8k+18 6217 6218.inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 6219.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 6220 stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result 6221 6222.inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 6223 mov v1.16b, v22.16b //CTR block 8k+17 6224 stp q12, q13, [x2], #32 //AES block 4, 5 - store result 6225 6226 stp q14, q15, [x2], #32 //AES block 6, 7 - store result 6227 mov v0.16b, v20.16b //CTR block 8k+16 6228 b.lt .L256_enc_main_loop 6229 6230.L256_enc_prepretail: //PREPRETAIL 6231 rev32 v5.16b, v30.16b //CTR block 8k+13 6232 ldp q26, q27, [x8, #0] //load rk0, rk1 6233 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 6234 6235 rev64 v10.16b, v10.16b //GHASH block 8k+2 6236 6237 rev32 v6.16b, v30.16b //CTR block 8k+14 6238 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 6239 6240 rev64 v13.16b, v13.16b //GHASH block 8k+5 6241 ldr q21, [x3, #144] //load h6k | h5k 6242 ldr q24, [x3, #192] //load h8k | h7k 6243 6244 rev32 v7.16b, v30.16b //CTR block 8k+15 6245 6246 aese v6.16b, v26.16b 6247 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 6248 aese v4.16b, v26.16b 6249 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 6250 aese v1.16b, v26.16b 6251 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 6252 6253 aese v5.16b, v26.16b 6254 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 6255 aese v0.16b, v26.16b 6256 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 6257 6258 aese v2.16b, v26.16b 6259 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 6260 aese v7.16b, v26.16b 6261 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 6262 aese v3.16b, v26.16b 6263 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 6264 6265 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 6266 rev64 v8.16b, v8.16b //GHASH block 8k 6267 aese v1.16b, v27.16b 6268 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 6269 6270 rev64 v9.16b, v9.16b //GHASH block 8k+1 6271 ldp q28, q26, [x8, #32] //load rk2, rk3 6272 aese v3.16b, v27.16b 6273 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 6274 6275 ldr q23, [x3, #176] //load h7l | h7h 6276 ext v23.16b, v23.16b, v23.16b, #8 6277 ldr q25, [x3, #208] //load h8l | h8h 6278 ext v25.16b, v25.16b, v25.16b, #8 6279 aese v2.16b, v27.16b 6280 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 6281 6282 ldr q20, [x3, #128] //load h5l | h5h 6283 ext v20.16b, v20.16b, v20.16b, #8 6284 ldr q22, [x3, #160] //load h6l | h6h 6285 ext v22.16b, v22.16b, v22.16b, #8 6286 aese v0.16b, v27.16b 6287 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 6288 aese v5.16b, v27.16b 6289 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 6290 6291 aese v4.16b, v27.16b 6292 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 6293 eor v8.16b, v8.16b, v19.16b //PRE 1 6294 6295 rev64 v11.16b, v11.16b //GHASH block 8k+3 6296 aese v6.16b, v27.16b 6297 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 6298 6299 aese v1.16b, v28.16b 6300 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 6301 aese v2.16b, v28.16b 6302 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 6303 aese v7.16b, v27.16b 6304 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 6305 6306 aese v4.16b, v28.16b 6307 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 6308 aese v0.16b, v28.16b 6309 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 6310 aese v6.16b, v28.16b 6311 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 6312 6313 aese v5.16b, v28.16b 6314 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 6315 aese v7.16b, v28.16b 6316 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 6317 aese v3.16b, v28.16b 6318 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 6319 6320 ldp q27, q28, [x8, #64] //load rk4, rk5 6321 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 6322 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 6323 6324 rev64 v14.16b, v14.16b //GHASH block 8k+6 6325 aese v4.16b, v26.16b 6326 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 6327 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 6328 6329 aese v7.16b, v26.16b 6330 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 6331 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 6332 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 6333 6334 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 6335 aese v6.16b, v26.16b 6336 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 6337 6338 aese v2.16b, v26.16b 6339 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 6340 aese v3.16b, v26.16b 6341 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 6342 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 6343 6344 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 6345 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 6346 aese v1.16b, v26.16b 6347 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 6348 6349 aese v0.16b, v26.16b 6350 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 6351 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 6352 aese v5.16b, v26.16b 6353 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 6354 6355 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 6356 aese v1.16b, v27.16b 6357 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 6358 aese v6.16b, v27.16b 6359 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 6360 6361 aese v0.16b, v27.16b 6362 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 6363 aese v2.16b, v27.16b 6364 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 6365 aese v4.16b, v27.16b 6366 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 6367 6368 aese v6.16b, v28.16b 6369 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 6370 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 6371.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 6372 6373 aese v7.16b, v27.16b 6374 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 6375 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 6376 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 6377 6378 aese v5.16b, v27.16b 6379 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 6380 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 6381 aese v3.16b, v27.16b 6382 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 6383 6384 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 6385 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 6386 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6387 6388 rev64 v12.16b, v12.16b //GHASH block 8k+4 6389 aese v1.16b, v28.16b 6390 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 6391 aese v0.16b, v28.16b 6392 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 6393 6394 aese v7.16b, v28.16b 6395 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 6396 aese v4.16b, v28.16b 6397 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 6398 ldp q26, q27, [x8, #96] //load rk6, rk7 6399 6400 ldr q23, [x3, #80] //load h3l | h3h 6401 ext v23.16b, v23.16b, v23.16b, #8 6402 ldr q25, [x3, #112] //load h4l | h4h 6403 ext v25.16b, v25.16b, v25.16b, #8 6404 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 6405 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 6406 6407.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 6408 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 6409 6410 aese v5.16b, v28.16b 6411 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 6412 rev64 v15.16b, v15.16b //GHASH block 8k+7 6413 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6414 6415 aese v3.16b, v28.16b 6416 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 6417 aese v2.16b, v28.16b 6418 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 6419.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 6420 6421 aese v7.16b, v26.16b 6422 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 6423 aese v4.16b, v26.16b 6424 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 6425 aese v6.16b, v26.16b 6426 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 6427 6428 ldr q21, [x3, #48] //load h2k | h1k 6429 ldr q24, [x3, #96] //load h4k | h3k 6430 aese v5.16b, v26.16b 6431 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 6432 aese v3.16b, v26.16b 6433 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 6434 6435 aese v0.16b, v26.16b 6436 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 6437 aese v1.16b, v26.16b 6438 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 6439 aese v2.16b, v26.16b 6440 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 6441 6442 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 6443 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 6444 ldr q20, [x3, #32] //load h1l | h1h 6445 ext v20.16b, v20.16b, v20.16b, #8 6446 ldr q22, [x3, #64] //load h2l | h2h 6447 ext v22.16b, v22.16b, v22.16b, #8 6448 6449 ldp q28, q26, [x8, #128] //load rk8, rk9 6450 aese v1.16b, v27.16b 6451 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 6452 aese v4.16b, v27.16b 6453 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 6454 6455 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 6456 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 6457 6458 aese v5.16b, v27.16b 6459 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 6460 aese v6.16b, v27.16b 6461 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 6462 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 6463 6464 aese v7.16b, v27.16b 6465 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 6466 aese v3.16b, v27.16b 6467 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 6468 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6469 6470 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 6471 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 6472 aese v2.16b, v27.16b 6473 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 6474 6475 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6476 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 6477 aese v0.16b, v27.16b 6478 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 6479 6480 aese v7.16b, v28.16b 6481 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 6482.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 6483 aese v2.16b, v28.16b 6484 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 6485 6486 aese v6.16b, v28.16b 6487 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 6488 aese v4.16b, v28.16b 6489 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 6490 aese v3.16b, v28.16b 6491 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 6492 6493 aese v5.16b, v28.16b 6494 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 6495 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6496 aese v0.16b, v28.16b 6497 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 6498 6499 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 6500 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 6501 aese v1.16b, v28.16b 6502 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 6503 6504 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 6505 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 6506 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 6507 6508 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 6509.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 6510.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 6511 6512 ldp q27, q28, [x8, #160] //load rk10, rk11 6513 aese v1.16b, v26.16b 6514 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 6515 aese v0.16b, v26.16b 6516 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 6517 6518.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 6519.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 6520 ldr d16, [x10] //MODULO - load modulo constant 6521 6522.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 6523 6524 aese v3.16b, v26.16b 6525 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 6526 aese v7.16b, v26.16b 6527 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 6528 aese v5.16b, v26.16b 6529 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 6530 6531 aese v2.16b, v26.16b 6532 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 6533 aese v6.16b, v26.16b 6534 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 6535 6536 aese v5.16b, v27.16b 6537 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 6538 aese v1.16b, v27.16b 6539 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 6540 aese v4.16b, v26.16b 6541 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 6542 6543 aese v7.16b, v27.16b 6544 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 6545 aese v6.16b, v27.16b 6546 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 6547 aese v3.16b, v27.16b 6548 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 6549 6550 aese v4.16b, v27.16b 6551 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 6552 aese v0.16b, v27.16b 6553 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 6554 aese v2.16b, v27.16b 6555 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 6556 6557 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6558.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6559 aese v7.16b, v28.16b 6560 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 6561 6562 ldp q26, q27, [x8, #192] //load rk12, rk13 6563 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6564 aese v2.16b, v28.16b 6565 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 6566 6567.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6568 aese v1.16b, v28.16b 6569 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 6570 aese v6.16b, v28.16b 6571 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 6572 6573 aese v0.16b, v28.16b 6574 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 6575 aese v4.16b, v28.16b 6576 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 6577 aese v5.16b, v28.16b 6578 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 6579 6580 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6581 aese v3.16b, v28.16b 6582 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 6583 ldr q28, [x8, #224] //load rk14 6584 6585 aese v1.16b, v26.16b 6586 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 6587 aese v2.16b, v26.16b 6588 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 6589 aese v0.16b, v26.16b 6590 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 6591 6592 aese v6.16b, v26.16b 6593 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 6594 aese v5.16b, v26.16b 6595 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 6596 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6597 6598 aese v4.16b, v26.16b 6599 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 6600 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 6601 6602 aese v3.16b, v26.16b 6603 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 6604 aese v7.16b, v26.16b 6605 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 6606 aese v0.16b, v27.16b //AES block 8k+8 - round 13 6607 6608.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 6609 aese v5.16b, v27.16b //AES block 8k+13 - round 13 6610 aese v1.16b, v27.16b //AES block 8k+9 - round 13 6611 6612 aese v3.16b, v27.16b //AES block 8k+11 - round 13 6613 aese v4.16b, v27.16b //AES block 8k+12 - round 13 6614 aese v7.16b, v27.16b //AES block 8k+15 - round 13 6615 6616 aese v2.16b, v27.16b //AES block 8k+10 - round 13 6617 aese v6.16b, v27.16b //AES block 8k+14 - round 13 6618.L256_enc_tail: //TAIL 6619 6620 ldp q24, q25, [x3, #192] //load h8l | h8h 6621 ext v25.16b, v25.16b, v25.16b, #8 6622 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 6623 6624 ldr q8, [x0], #16 //AES block 8k+8 - load plaintext 6625 6626 ldp q20, q21, [x3, #128] //load h5l | h5h 6627 ext v20.16b, v20.16b, v20.16b, #8 6628 6629 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 6630 ldp q22, q23, [x3, #160] //load h6l | h6h 6631 ext v22.16b, v22.16b, v22.16b, #8 6632 ext v23.16b, v23.16b, v23.16b, #8 6633 mov v29.16b, v28.16b 6634 6635 cmp x5, #112 6636.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result 6637 b.gt .L256_enc_blocks_more_than_7 6638 6639 movi v19.8b, #0 6640 mov v7.16b, v6.16b 6641 movi v17.8b, #0 6642 6643 mov v6.16b, v5.16b 6644 mov v5.16b, v4.16b 6645 mov v4.16b, v3.16b 6646 6647 mov v3.16b, v2.16b 6648 sub v30.4s, v30.4s, v31.4s 6649 mov v2.16b, v1.16b 6650 6651 movi v18.8b, #0 6652 cmp x5, #96 6653 b.gt .L256_enc_blocks_more_than_6 6654 6655 mov v7.16b, v6.16b 6656 mov v6.16b, v5.16b 6657 cmp x5, #80 6658 6659 mov v5.16b, v4.16b 6660 mov v4.16b, v3.16b 6661 mov v3.16b, v1.16b 6662 6663 sub v30.4s, v30.4s, v31.4s 6664 b.gt .L256_enc_blocks_more_than_5 6665 6666 mov v7.16b, v6.16b 6667 sub v30.4s, v30.4s, v31.4s 6668 6669 mov v6.16b, v5.16b 6670 mov v5.16b, v4.16b 6671 6672 cmp x5, #64 6673 mov v4.16b, v1.16b 6674 b.gt .L256_enc_blocks_more_than_4 6675 6676 cmp x5, #48 6677 mov v7.16b, v6.16b 6678 mov v6.16b, v5.16b 6679 6680 mov v5.16b, v1.16b 6681 sub v30.4s, v30.4s, v31.4s 6682 b.gt .L256_enc_blocks_more_than_3 6683 6684 cmp x5, #32 6685 mov v7.16b, v6.16b 6686 ldr q24, [x3, #96] //load h4k | h3k 6687 6688 mov v6.16b, v1.16b 6689 sub v30.4s, v30.4s, v31.4s 6690 b.gt .L256_enc_blocks_more_than_2 6691 6692 mov v7.16b, v1.16b 6693 6694 sub v30.4s, v30.4s, v31.4s 6695 cmp x5, #16 6696 b.gt .L256_enc_blocks_more_than_1 6697 6698 sub v30.4s, v30.4s, v31.4s 6699 ldr q21, [x3, #48] //load h2k | h1k 6700 b .L256_enc_blocks_less_than_1 6701.L256_enc_blocks_more_than_7: //blocks left > 7 6702 st1 { v9.16b}, [x2], #16 //AES final-7 block - store result 6703 6704 rev64 v8.16b, v9.16b //GHASH final-7 block 6705 6706 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6707 6708 ldr q9, [x0], #16 //AES final-6 block - load plaintext 6709 6710 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 6711 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 6712 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 6713 6714 movi v16.8b, #0 //suppress further partial tag feed in 6715 6716 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 6717.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 6718 6719 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 6720 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 6721.L256_enc_blocks_more_than_6: //blocks left > 6 6722 6723 st1 { v9.16b}, [x2], #16 //AES final-6 block - store result 6724 6725 rev64 v8.16b, v9.16b //GHASH final-6 block 6726 6727 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6728 6729 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 6730 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 6731 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 6732 6733 ldr q9, [x0], #16 //AES final-5 block - load plaintext 6734 6735 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 6736 6737 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 6738 6739 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 6740.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 6741 6742 movi v16.8b, #0 //suppress further partial tag feed in 6743 6744 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 6745 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 6746.L256_enc_blocks_more_than_5: //blocks left > 5 6747 6748 st1 { v9.16b}, [x2], #16 //AES final-5 block - store result 6749 6750 rev64 v8.16b, v9.16b //GHASH final-5 block 6751 6752 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6753 6754 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 6755 6756 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 6757 6758 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 6759 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 6760 6761 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 6762 6763 ldr q9, [x0], #16 //AES final-4 block - load plaintext 6764 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 6765 6766 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 6767 movi v16.8b, #0 //suppress further partial tag feed in 6768 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 6769 6770 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 6771.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 6772.L256_enc_blocks_more_than_4: //blocks left > 4 6773 6774 st1 { v9.16b}, [x2], #16 //AES final-4 block - store result 6775 6776 rev64 v8.16b, v9.16b //GHASH final-4 block 6777 6778 ldr q9, [x0], #16 //AES final-3 block - load plaintext 6779 6780 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6781 6782 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 6783 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 6784 6785.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 6786 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 6787 6788 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 6789 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 6790 6791 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 6792 6793 movi v16.8b, #0 //suppress further partial tag feed in 6794 6795 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 6796 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 6797.L256_enc_blocks_more_than_3: //blocks left > 3 6798 6799 st1 { v9.16b}, [x2], #16 //AES final-3 block - store result 6800 6801 ldr q25, [x3, #112] //load h4l | h4h 6802 ext v25.16b, v25.16b, v25.16b, #8 6803 rev64 v8.16b, v9.16b //GHASH final-3 block 6804 6805 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6806 6807 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 6808 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 6809 6810 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 6811 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 6812 ldr q24, [x3, #96] //load h4k | h3k 6813 6814 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 6815 ldr q9, [x0], #16 //AES final-2 block - load plaintext 6816 6817 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 6818 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 6819 6820.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 6821 movi v16.8b, #0 //suppress further partial tag feed in 6822 6823 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 6824 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 6825.L256_enc_blocks_more_than_2: //blocks left > 2 6826 6827 ldr q23, [x3, #80] //load h3l | h3h 6828 ext v23.16b, v23.16b, v23.16b, #8 6829 6830 st1 { v9.16b}, [x2], #16 //AES final-2 block - store result 6831 6832 rev64 v8.16b, v9.16b //GHASH final-2 block 6833 ldr q9, [x0], #16 //AES final-1 block - load plaintext 6834 6835 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6836 6837 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 6838 6839 movi v16.8b, #0 //suppress further partial tag feed in 6840 6841 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 6842.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 6843 6844 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 6845 6846 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 6847 6848 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 6849 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 6850 6851 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 6852 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 6853.L256_enc_blocks_more_than_1: //blocks left > 1 6854 6855 st1 { v9.16b}, [x2], #16 //AES final-1 block - store result 6856 6857 ldr q22, [x3, #64] //load h2l | h2h 6858 ext v22.16b, v22.16b, v22.16b, #8 6859 rev64 v8.16b, v9.16b //GHASH final-1 block 6860 ldr q9, [x0], #16 //AES final block - load plaintext 6861 6862 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6863 movi v16.8b, #0 //suppress further partial tag feed in 6864 6865 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 6866 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 6867 6868.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result 6869 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 6870 6871 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 6872 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 6873 6874 ldr q21, [x3, #48] //load h2k | h1k 6875 6876 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 6877 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 6878 6879 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 6880 6881 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 6882.L256_enc_blocks_less_than_1: //blocks left <= 1 6883 6884 and x1, x1, #127 //bit_length %= 128 6885 6886 sub x1, x1, #128 //bit_length -= 128 6887 6888 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 6889 6890 mvn x6, xzr //temp0_x = 0xffffffffffffffff 6891 and x1, x1, #127 //bit_length %= 128 6892 6893 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 6894 cmp x1, #64 6895 mvn x7, xzr //temp1_x = 0xffffffffffffffff 6896 6897 csel x14, x6, xzr, lt 6898 csel x13, x7, x6, lt 6899 6900 mov v0.d[0], x13 //ctr0b is mask for last block 6901 ldr q20, [x3, #32] //load h1l | h1h 6902 ext v20.16b, v20.16b, v20.16b, #8 6903 6904 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 6905 mov v0.d[1], x14 6906 6907 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 6908 6909 rev64 v8.16b, v9.16b //GHASH final block 6910 6911 rev32 v30.16b, v30.16b 6912 bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 6913 str q30, [x16] //store the updated counter 6914 6915 eor v8.16b, v8.16b, v16.16b //feed in partial tag 6916 st1 { v9.16b}, [x2] //store all 16B 6917 6918 ins v16.d[0], v8.d[1] //GHASH final block - mid 6919 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 6920 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 6921 6922 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 6923 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 6924 6925 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 6926 6927 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 6928 6929 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 6930 ldr d16, [x10] //MODULO - load modulo constant 6931 6932 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 6933 6934.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 6935 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 6936 6937.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 6938 6939 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 6940 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 6941 6942.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low 6943 ext v19.16b, v19.16b, v19.16b, #8 6944 rev64 v19.16b, v19.16b 6945 st1 { v19.16b }, [x3] 6946 mov x0, x9 //return sizes 6947 6948 ldp d10, d11, [sp, #16] 6949 ldp d12, d13, [sp, #32] 6950 ldp d14, d15, [sp, #48] 6951 ldp d8, d9, [sp], #80 6952 ret 6953 6954.L256_enc_ret: 6955 mov w0, #0x0 6956 ret 6957.size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel 6958.globl unroll8_eor3_aes_gcm_dec_256_kernel 6959.type unroll8_eor3_aes_gcm_dec_256_kernel,%function 6960.align 4 6961unroll8_eor3_aes_gcm_dec_256_kernel: 6962 AARCH64_VALID_CALL_TARGET 6963 cbz x1, .L256_dec_ret 6964 stp d8, d9, [sp, #-80]! 6965 lsr x9, x1, #3 6966 mov x16, x4 6967 mov x8, x5 6968 stp d10, d11, [sp, #16] 6969 stp d12, d13, [sp, #32] 6970 stp d14, d15, [sp, #48] 6971 mov x5, #0xc200000000000000 6972 stp x5, xzr, [sp, #64] 6973 add x10, sp, #64 6974 6975 ld1 { v0.16b}, [x16] //CTR block 0 6976 6977 mov x15, #0x100000000 //set up counter increment 6978 movi v31.16b, #0x0 6979 mov v31.d[1], x15 6980 mov x5, x9 6981 6982 sub x5, x5, #1 //byte_len - 1 6983 6984 rev32 v30.16b, v0.16b //set up reversed counter 6985 6986 add v30.4s, v30.4s, v31.4s //CTR block 0 6987 6988 rev32 v1.16b, v30.16b //CTR block 1 6989 add v30.4s, v30.4s, v31.4s //CTR block 1 6990 6991 rev32 v2.16b, v30.16b //CTR block 2 6992 add v30.4s, v30.4s, v31.4s //CTR block 2 6993 ldp q26, q27, [x8, #0] //load rk0, rk1 6994 6995 rev32 v3.16b, v30.16b //CTR block 3 6996 add v30.4s, v30.4s, v31.4s //CTR block 3 6997 6998 rev32 v4.16b, v30.16b //CTR block 4 6999 add v30.4s, v30.4s, v31.4s //CTR block 4 7000 7001 aese v0.16b, v26.16b 7002 aesmc v0.16b, v0.16b //AES block 0 - round 0 7003 7004 rev32 v5.16b, v30.16b //CTR block 5 7005 add v30.4s, v30.4s, v31.4s //CTR block 5 7006 7007 aese v1.16b, v26.16b 7008 aesmc v1.16b, v1.16b //AES block 1 - round 0 7009 aese v2.16b, v26.16b 7010 aesmc v2.16b, v2.16b //AES block 2 - round 0 7011 7012 rev32 v6.16b, v30.16b //CTR block 6 7013 add v30.4s, v30.4s, v31.4s //CTR block 6 7014 7015 rev32 v7.16b, v30.16b //CTR block 7 7016 aese v4.16b, v26.16b 7017 aesmc v4.16b, v4.16b //AES block 4 - round 0 7018 7019 aese v6.16b, v26.16b 7020 aesmc v6.16b, v6.16b //AES block 6 - round 0 7021 aese v5.16b, v26.16b 7022 aesmc v5.16b, v5.16b //AES block 5 - round 0 7023 7024 aese v3.16b, v26.16b 7025 aesmc v3.16b, v3.16b //AES block 3 - round 0 7026 aese v7.16b, v26.16b 7027 aesmc v7.16b, v7.16b //AES block 7 - round 0 7028 ldp q28, q26, [x8, #32] //load rk2, rk3 7029 7030 aese v6.16b, v27.16b 7031 aesmc v6.16b, v6.16b //AES block 6 - round 1 7032 aese v4.16b, v27.16b 7033 aesmc v4.16b, v4.16b //AES block 4 - round 1 7034 aese v0.16b, v27.16b 7035 aesmc v0.16b, v0.16b //AES block 0 - round 1 7036 7037 aese v5.16b, v27.16b 7038 aesmc v5.16b, v5.16b //AES block 5 - round 1 7039 aese v7.16b, v27.16b 7040 aesmc v7.16b, v7.16b //AES block 7 - round 1 7041 aese v1.16b, v27.16b 7042 aesmc v1.16b, v1.16b //AES block 1 - round 1 7043 7044 aese v2.16b, v27.16b 7045 aesmc v2.16b, v2.16b //AES block 2 - round 1 7046 aese v3.16b, v27.16b 7047 aesmc v3.16b, v3.16b //AES block 3 - round 1 7048 7049 aese v3.16b, v28.16b 7050 aesmc v3.16b, v3.16b //AES block 3 - round 2 7051 aese v2.16b, v28.16b 7052 aesmc v2.16b, v2.16b //AES block 2 - round 2 7053 aese v6.16b, v28.16b 7054 aesmc v6.16b, v6.16b //AES block 6 - round 2 7055 7056 aese v1.16b, v28.16b 7057 aesmc v1.16b, v1.16b //AES block 1 - round 2 7058 aese v7.16b, v28.16b 7059 aesmc v7.16b, v7.16b //AES block 7 - round 2 7060 aese v5.16b, v28.16b 7061 aesmc v5.16b, v5.16b //AES block 5 - round 2 7062 7063 aese v0.16b, v28.16b 7064 aesmc v0.16b, v0.16b //AES block 0 - round 2 7065 aese v4.16b, v28.16b 7066 aesmc v4.16b, v4.16b //AES block 4 - round 2 7067 ldp q27, q28, [x8, #64] //load rk4, rk5 7068 7069 aese v1.16b, v26.16b 7070 aesmc v1.16b, v1.16b //AES block 1 - round 3 7071 aese v2.16b, v26.16b 7072 aesmc v2.16b, v2.16b //AES block 2 - round 3 7073 7074 aese v3.16b, v26.16b 7075 aesmc v3.16b, v3.16b //AES block 3 - round 3 7076 aese v4.16b, v26.16b 7077 aesmc v4.16b, v4.16b //AES block 4 - round 3 7078 7079 aese v5.16b, v26.16b 7080 aesmc v5.16b, v5.16b //AES block 5 - round 3 7081 aese v7.16b, v26.16b 7082 aesmc v7.16b, v7.16b //AES block 7 - round 3 7083 aese v0.16b, v26.16b 7084 aesmc v0.16b, v0.16b //AES block 0 - round 3 7085 7086 aese v6.16b, v26.16b 7087 aesmc v6.16b, v6.16b //AES block 6 - round 3 7088 7089 aese v7.16b, v27.16b 7090 aesmc v7.16b, v7.16b //AES block 7 - round 4 7091 aese v3.16b, v27.16b 7092 aesmc v3.16b, v3.16b //AES block 3 - round 4 7093 7094 aese v6.16b, v27.16b 7095 aesmc v6.16b, v6.16b //AES block 6 - round 4 7096 aese v2.16b, v27.16b 7097 aesmc v2.16b, v2.16b //AES block 2 - round 4 7098 aese v0.16b, v27.16b 7099 aesmc v0.16b, v0.16b //AES block 0 - round 4 7100 7101 aese v4.16b, v27.16b 7102 aesmc v4.16b, v4.16b //AES block 4 - round 4 7103 aese v1.16b, v27.16b 7104 aesmc v1.16b, v1.16b //AES block 1 - round 4 7105 aese v5.16b, v27.16b 7106 aesmc v5.16b, v5.16b //AES block 5 - round 4 7107 7108 aese v0.16b, v28.16b 7109 aesmc v0.16b, v0.16b //AES block 0 - round 5 7110 aese v6.16b, v28.16b 7111 aesmc v6.16b, v6.16b //AES block 6 - round 5 7112 7113 ldp q26, q27, [x8, #96] //load rk6, rk7 7114 aese v4.16b, v28.16b 7115 aesmc v4.16b, v4.16b //AES block 4 - round 5 7116 aese v7.16b, v28.16b 7117 aesmc v7.16b, v7.16b //AES block 7 - round 5 7118 7119 aese v5.16b, v28.16b 7120 aesmc v5.16b, v5.16b //AES block 5 - round 5 7121 7122 aese v2.16b, v28.16b 7123 aesmc v2.16b, v2.16b //AES block 2 - round 5 7124 aese v3.16b, v28.16b 7125 aesmc v3.16b, v3.16b //AES block 3 - round 5 7126 7127 aese v1.16b, v28.16b 7128 aesmc v1.16b, v1.16b //AES block 1 - round 5 7129 7130 aese v4.16b, v26.16b 7131 aesmc v4.16b, v4.16b //AES block 4 - round 6 7132 aese v3.16b, v26.16b 7133 aesmc v3.16b, v3.16b //AES block 3 - round 6 7134 aese v7.16b, v26.16b 7135 aesmc v7.16b, v7.16b //AES block 7 - round 6 7136 7137 aese v6.16b, v26.16b 7138 aesmc v6.16b, v6.16b //AES block 6 - round 6 7139 aese v0.16b, v26.16b 7140 aesmc v0.16b, v0.16b //AES block 0 - round 6 7141 aese v5.16b, v26.16b 7142 aesmc v5.16b, v5.16b //AES block 5 - round 6 7143 7144 aese v2.16b, v26.16b 7145 aesmc v2.16b, v2.16b //AES block 2 - round 6 7146 aese v1.16b, v26.16b 7147 aesmc v1.16b, v1.16b //AES block 1 - round 6 7148 ldp q28, q26, [x8, #128] //load rk8, rk9 7149 7150 aese v5.16b, v27.16b 7151 aesmc v5.16b, v5.16b //AES block 5 - round 7 7152 aese v0.16b, v27.16b 7153 aesmc v0.16b, v0.16b //AES block 0 - round 7 7154 7155 aese v3.16b, v27.16b 7156 aesmc v3.16b, v3.16b //AES block 3 - round 7 7157 aese v2.16b, v27.16b 7158 aesmc v2.16b, v2.16b //AES block 2 - round 7 7159 aese v7.16b, v27.16b 7160 aesmc v7.16b, v7.16b //AES block 7 - round 7 7161 7162 aese v4.16b, v27.16b 7163 aesmc v4.16b, v4.16b //AES block 4 - round 7 7164 aese v1.16b, v27.16b 7165 aesmc v1.16b, v1.16b //AES block 1 - round 7 7166 aese v6.16b, v27.16b 7167 aesmc v6.16b, v6.16b //AES block 6 - round 7 7168 7169 and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 7170 aese v7.16b, v28.16b 7171 aesmc v7.16b, v7.16b //AES block 7 - round 8 7172 aese v5.16b, v28.16b 7173 aesmc v5.16b, v5.16b //AES block 5 - round 8 7174 7175 aese v0.16b, v28.16b 7176 aesmc v0.16b, v0.16b //AES block 0 - round 8 7177 aese v1.16b, v28.16b 7178 aesmc v1.16b, v1.16b //AES block 1 - round 8 7179 aese v2.16b, v28.16b 7180 aesmc v2.16b, v2.16b //AES block 2 - round 8 7181 7182 aese v4.16b, v28.16b 7183 aesmc v4.16b, v4.16b //AES block 4 - round 8 7184 aese v3.16b, v28.16b 7185 aesmc v3.16b, v3.16b //AES block 3 - round 8 7186 aese v6.16b, v28.16b 7187 aesmc v6.16b, v6.16b //AES block 6 - round 8 7188 7189 aese v2.16b, v26.16b 7190 aesmc v2.16b, v2.16b //AES block 2 - round 9 7191 7192 ld1 { v19.16b}, [x3] 7193 ext v19.16b, v19.16b, v19.16b, #8 7194 rev64 v19.16b, v19.16b 7195 ldp q27, q28, [x8, #160] //load rk10, rk11 7196 add x4, x0, x1, lsr #3 //end_input_ptr 7197 add x5, x5, x0 7198 7199 aese v3.16b, v26.16b 7200 aesmc v3.16b, v3.16b //AES block 3 - round 9 7201 aese v6.16b, v26.16b 7202 aesmc v6.16b, v6.16b //AES block 6 - round 9 7203 7204 aese v4.16b, v26.16b 7205 aesmc v4.16b, v4.16b //AES block 4 - round 9 7206 aese v5.16b, v26.16b 7207 aesmc v5.16b, v5.16b //AES block 5 - round 9 7208 7209 aese v7.16b, v26.16b 7210 aesmc v7.16b, v7.16b //AES block 7 - round 9 7211 7212 aese v0.16b, v26.16b 7213 aesmc v0.16b, v0.16b //AES block 0 - round 9 7214 aese v1.16b, v26.16b 7215 aesmc v1.16b, v1.16b //AES block 1 - round 9 7216 7217 aese v4.16b, v27.16b 7218 aesmc v4.16b, v4.16b //AES block 4 - round 10 7219 aese v7.16b, v27.16b 7220 aesmc v7.16b, v7.16b //AES block 7 - round 10 7221 aese v5.16b, v27.16b 7222 aesmc v5.16b, v5.16b //AES block 5 - round 10 7223 7224 aese v1.16b, v27.16b 7225 aesmc v1.16b, v1.16b //AES block 1 - round 10 7226 aese v2.16b, v27.16b 7227 aesmc v2.16b, v2.16b //AES block 2 - round 10 7228 aese v0.16b, v27.16b 7229 aesmc v0.16b, v0.16b //AES block 0 - round 10 7230 7231 aese v6.16b, v27.16b 7232 aesmc v6.16b, v6.16b //AES block 6 - round 10 7233 aese v3.16b, v27.16b 7234 aesmc v3.16b, v3.16b //AES block 3 - round 10 7235 ldp q26, q27, [x8, #192] //load rk12, rk13 7236 7237 aese v0.16b, v28.16b 7238 aesmc v0.16b, v0.16b //AES block 0 - round 11 7239 add v30.4s, v30.4s, v31.4s //CTR block 7 7240 7241 aese v7.16b, v28.16b 7242 aesmc v7.16b, v7.16b //AES block 7 - round 11 7243 aese v3.16b, v28.16b 7244 aesmc v3.16b, v3.16b //AES block 3 - round 11 7245 aese v1.16b, v28.16b 7246 aesmc v1.16b, v1.16b //AES block 1 - round 11 7247 7248 aese v5.16b, v28.16b 7249 aesmc v5.16b, v5.16b //AES block 5 - round 11 7250 aese v4.16b, v28.16b 7251 aesmc v4.16b, v4.16b //AES block 4 - round 11 7252 aese v2.16b, v28.16b 7253 aesmc v2.16b, v2.16b //AES block 2 - round 11 7254 7255 aese v6.16b, v28.16b 7256 aesmc v6.16b, v6.16b //AES block 6 - round 11 7257 ldr q28, [x8, #224] //load rk14 7258 7259 aese v1.16b, v26.16b 7260 aesmc v1.16b, v1.16b //AES block 1 - round 12 7261 aese v4.16b, v26.16b 7262 aesmc v4.16b, v4.16b //AES block 4 - round 12 7263 aese v5.16b, v26.16b 7264 aesmc v5.16b, v5.16b //AES block 5 - round 12 7265 7266 cmp x0, x5 //check if we have <= 8 blocks 7267 aese v3.16b, v26.16b 7268 aesmc v3.16b, v3.16b //AES block 3 - round 12 7269 aese v2.16b, v26.16b 7270 aesmc v2.16b, v2.16b //AES block 2 - round 12 7271 7272 aese v6.16b, v26.16b 7273 aesmc v6.16b, v6.16b //AES block 6 - round 12 7274 aese v0.16b, v26.16b 7275 aesmc v0.16b, v0.16b //AES block 0 - round 12 7276 aese v7.16b, v26.16b 7277 aesmc v7.16b, v7.16b //AES block 7 - round 12 7278 7279 aese v5.16b, v27.16b //AES block 5 - round 13 7280 aese v1.16b, v27.16b //AES block 1 - round 13 7281 aese v2.16b, v27.16b //AES block 2 - round 13 7282 7283 aese v0.16b, v27.16b //AES block 0 - round 13 7284 aese v4.16b, v27.16b //AES block 4 - round 13 7285 aese v6.16b, v27.16b //AES block 6 - round 13 7286 7287 aese v3.16b, v27.16b //AES block 3 - round 13 7288 aese v7.16b, v27.16b //AES block 7 - round 13 7289 b.ge .L256_dec_tail //handle tail 7290 7291 ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext 7292 7293 ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext 7294 7295 ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext 7296 7297 ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext 7298 cmp x0, x5 //check if we have <= 8 blocks 7299 7300.inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result 7301.inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result 7302 stp q0, q1, [x2], #32 //AES block 0, 1 - store result 7303 7304 rev32 v0.16b, v30.16b //CTR block 8 7305 add v30.4s, v30.4s, v31.4s //CTR block 8 7306.inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result 7307 7308.inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result 7309 7310.inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result 7311 rev32 v1.16b, v30.16b //CTR block 9 7312 add v30.4s, v30.4s, v31.4s //CTR block 9 7313 7314.inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result 7315 stp q2, q3, [x2], #32 //AES block 2, 3 - store result 7316 7317 rev32 v2.16b, v30.16b //CTR block 10 7318 add v30.4s, v30.4s, v31.4s //CTR block 10 7319 7320.inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result 7321 7322 rev32 v3.16b, v30.16b //CTR block 11 7323 add v30.4s, v30.4s, v31.4s //CTR block 11 7324 stp q4, q5, [x2], #32 //AES block 4, 5 - store result 7325 7326.inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result 7327 stp q6, q7, [x2], #32 //AES block 6, 7 - store result 7328 7329 rev32 v4.16b, v30.16b //CTR block 12 7330 add v30.4s, v30.4s, v31.4s //CTR block 12 7331 b.ge .L256_dec_prepretail //do prepretail 7332 7333.L256_dec_main_loop: //main loop start 7334 rev32 v5.16b, v30.16b //CTR block 8k+13 7335 ldp q26, q27, [x8, #0] //load rk0, rk1 7336 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 7337 7338 rev64 v9.16b, v9.16b //GHASH block 8k+1 7339 ldr q23, [x3, #176] //load h7l | h7h 7340 ext v23.16b, v23.16b, v23.16b, #8 7341 ldr q25, [x3, #208] //load h8l | h8h 7342 ext v25.16b, v25.16b, v25.16b, #8 7343 7344 rev32 v6.16b, v30.16b //CTR block 8k+14 7345 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 7346 rev64 v8.16b, v8.16b //GHASH block 8k 7347 7348 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 7349 rev64 v12.16b, v12.16b //GHASH block 8k+4 7350 rev64 v11.16b, v11.16b //GHASH block 8k+3 7351 7352 rev32 v7.16b, v30.16b //CTR block 8k+15 7353 rev64 v15.16b, v15.16b //GHASH block 8k+7 7354 7355 aese v3.16b, v26.16b 7356 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 7357 aese v6.16b, v26.16b 7358 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 7359 aese v2.16b, v26.16b 7360 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 7361 7362 aese v7.16b, v26.16b 7363 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 7364 aese v0.16b, v26.16b 7365 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 7366 aese v5.16b, v26.16b 7367 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 7368 7369 aese v4.16b, v26.16b 7370 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 7371 aese v1.16b, v26.16b 7372 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 7373 ldp q28, q26, [x8, #32] //load rk2, rk3 7374 7375 eor v8.16b, v8.16b, v19.16b //PRE 1 7376 ldr q20, [x3, #128] //load h5l | h5h 7377 ext v20.16b, v20.16b, v20.16b, #8 7378 ldr q22, [x3, #160] //load h6l | h6h 7379 ext v22.16b, v22.16b, v22.16b, #8 7380 aese v6.16b, v27.16b 7381 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 7382 7383 aese v4.16b, v27.16b 7384 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 7385 rev64 v10.16b, v10.16b //GHASH block 8k+2 7386 aese v3.16b, v27.16b 7387 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 7388 7389 aese v0.16b, v27.16b 7390 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 7391 aese v5.16b, v27.16b 7392 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 7393 aese v2.16b, v27.16b 7394 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 7395 7396 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7397 aese v7.16b, v27.16b 7398 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 7399 aese v1.16b, v27.16b 7400 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 7401 7402 aese v4.16b, v28.16b 7403 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 7404 aese v0.16b, v28.16b 7405 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 7406 aese v3.16b, v28.16b 7407 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 7408 7409 aese v6.16b, v28.16b 7410 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 7411 aese v7.16b, v28.16b 7412 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 7413 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 7414 7415 aese v5.16b, v28.16b 7416 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 7417 aese v2.16b, v28.16b 7418 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 7419 aese v1.16b, v28.16b 7420 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 7421 7422 ldp q27, q28, [x8, #64] //load rk4, rk5 7423 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 7424 aese v3.16b, v26.16b 7425 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 7426 7427 aese v0.16b, v26.16b 7428 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 7429 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 7430 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 7431 7432 aese v5.16b, v26.16b 7433 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 7434 aese v6.16b, v26.16b 7435 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 7436 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 7437 7438 aese v4.16b, v26.16b 7439 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 7440 aese v1.16b, v26.16b 7441 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 7442 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7443 7444 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 7445 aese v2.16b, v26.16b 7446 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 7447 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 7448 7449 aese v5.16b, v27.16b 7450 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 7451 aese v7.16b, v26.16b 7452 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 7453 aese v3.16b, v27.16b 7454 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 7455 7456 aese v2.16b, v27.16b 7457 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 7458 aese v0.16b, v27.16b 7459 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 7460 aese v1.16b, v27.16b 7461 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 7462 7463 aese v6.16b, v27.16b 7464 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 7465 aese v7.16b, v27.16b 7466 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 7467 aese v4.16b, v27.16b 7468 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 7469 7470 ldr q21, [x3, #144] //load h6k | h5k 7471 ldr q24, [x3, #192] //load h8k | h7k 7472 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 7473 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 7474 7475 ldp q26, q27, [x8, #96] //load rk6, rk7 7476 aese v5.16b, v28.16b 7477 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 7478 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 7479 7480 aese v0.16b, v28.16b 7481 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 7482 aese v3.16b, v28.16b 7483 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 7484 aese v7.16b, v28.16b 7485 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 7486 7487 aese v1.16b, v28.16b 7488 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 7489 aese v2.16b, v28.16b 7490 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 7491 aese v6.16b, v28.16b 7492 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 7493 7494.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 7495 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7496 rev64 v13.16b, v13.16b //GHASH block 8k+5 7497 7498 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 7499 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 7500 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7501 7502 aese v3.16b, v26.16b 7503 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 7504 aese v0.16b, v26.16b 7505 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 7506 aese v4.16b, v28.16b 7507 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 7508 7509 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7510 aese v1.16b, v26.16b 7511 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 7512 aese v6.16b, v26.16b 7513 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 7514 7515 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7516 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 7517 aese v4.16b, v26.16b 7518 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 7519 7520 aese v2.16b, v26.16b 7521 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 7522 aese v5.16b, v26.16b 7523 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 7524 aese v7.16b, v26.16b 7525 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 7526 7527 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 7528 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 7529.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 7530 7531 ldr q23, [x3, #80] //load h3l | h3h 7532 ext v23.16b, v23.16b, v23.16b, #8 7533 ldr q25, [x3, #112] //load h4l | h4h 7534 ext v25.16b, v25.16b, v25.16b, #8 7535 rev64 v14.16b, v14.16b //GHASH block 8k+6 7536 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 7537 7538 aese v2.16b, v27.16b 7539 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 7540 aese v5.16b, v27.16b 7541 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 7542 ldp q28, q26, [x8, #128] //load rk8, rk9 7543 7544 ldr q20, [x3, #32] //load h1l | h1h 7545 ext v20.16b, v20.16b, v20.16b, #8 7546 ldr q22, [x3, #64] //load h2l | h2h 7547 ext v22.16b, v22.16b, v22.16b, #8 7548.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7549 aese v7.16b, v27.16b 7550 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 7551 7552 aese v1.16b, v27.16b 7553 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 7554 aese v3.16b, v27.16b 7555 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 7556 aese v6.16b, v27.16b 7557 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 7558 7559 ldr q21, [x3, #48] //load h2k | h1k 7560 ldr q24, [x3, #96] //load h4k | h3k 7561 aese v0.16b, v27.16b 7562 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 7563 aese v4.16b, v27.16b 7564 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 7565 7566 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 7567 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 7568 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7569 7570 aese v5.16b, v28.16b 7571 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 7572 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 7573 aese v2.16b, v28.16b 7574 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 7575 7576 aese v6.16b, v28.16b 7577 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 7578 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 7579 aese v1.16b, v28.16b 7580 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 7581 7582 aese v4.16b, v28.16b 7583 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 7584 aese v0.16b, v28.16b 7585 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 7586 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 7587 7588 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7589 aese v3.16b, v28.16b 7590 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 7591 aese v7.16b, v28.16b 7592 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 7593 7594 ldp q27, q28, [x8, #160] //load rk10, rk11 7595 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 7596 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7597 7598 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 7599.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 7600 aese v3.16b, v26.16b 7601 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 7602 7603 aese v6.16b, v26.16b 7604 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 7605 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 7606 aese v5.16b, v26.16b 7607 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 7608 7609 ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext 7610 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 7611 aese v7.16b, v26.16b 7612 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 7613 7614 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 7615 aese v2.16b, v26.16b 7616 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 7617 aese v1.16b, v26.16b 7618 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 7619 7620 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 7621 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 7622 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 7623 7624 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 7625 aese v3.16b, v27.16b 7626 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 7627 aese v6.16b, v27.16b 7628 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 7629 7630 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 7631 aese v0.16b, v26.16b 7632 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 7633.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 7634 7635 aese v4.16b, v26.16b 7636 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 7637.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 7638.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 7639 7640 aese v2.16b, v27.16b 7641 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 7642 aese v5.16b, v27.16b 7643 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 7644 aese v7.16b, v27.16b 7645 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 7646 7647 aese v1.16b, v27.16b 7648 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 7649 aese v0.16b, v27.16b 7650 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 7651 aese v4.16b, v27.16b 7652 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 7653 7654.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 7655 rev32 v20.16b, v30.16b //CTR block 8k+16 7656 ldr d16, [x10] //MODULO - load modulo constant 7657 7658 add v30.4s, v30.4s, v31.4s //CTR block 8k+16 7659 aese v1.16b, v28.16b 7660 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 7661 ldp q26, q27, [x8, #192] //load rk12, rk13 7662 7663 aese v0.16b, v28.16b 7664 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 7665 aese v6.16b, v28.16b 7666 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 7667 7668.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 7669 rev32 v22.16b, v30.16b //CTR block 8k+17 7670 aese v2.16b, v28.16b 7671 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 7672 7673 ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext 7674 aese v7.16b, v28.16b 7675 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 7676 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 7677 7678 aese v5.16b, v28.16b 7679 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 7680 add v30.4s, v30.4s, v31.4s //CTR block 8k+17 7681 aese v3.16b, v28.16b 7682 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 7683 7684 aese v2.16b, v26.16b 7685 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 7686 aese v7.16b, v26.16b 7687 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 7688 aese v6.16b, v26.16b 7689 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 7690 7691 rev32 v23.16b, v30.16b //CTR block 8k+18 7692 add v30.4s, v30.4s, v31.4s //CTR block 8k+18 7693 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 7694 7695.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 7696 aese v1.16b, v26.16b 7697 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 7698 aese v4.16b, v28.16b 7699 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 7700 7701 ldr q28, [x8, #224] //load rk14 7702 aese v5.16b, v26.16b 7703 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 7704 aese v3.16b, v26.16b 7705 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 7706 7707.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 7708 aese v0.16b, v26.16b 7709 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 7710 aese v4.16b, v26.16b 7711 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 7712 7713 ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext 7714 aese v1.16b, v27.16b //AES block 8k+9 - round 13 7715 aese v2.16b, v27.16b //AES block 8k+10 - round 13 7716 7717 ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext 7718 aese v0.16b, v27.16b //AES block 8k+8 - round 13 7719 aese v5.16b, v27.16b //AES block 8k+13 - round 13 7720 7721 rev32 v25.16b, v30.16b //CTR block 8k+19 7722.inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result 7723.inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result 7724 7725 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 7726 aese v7.16b, v27.16b //AES block 8k+15 - round 13 7727 7728 add v30.4s, v30.4s, v31.4s //CTR block 8k+19 7729 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 7730 aese v4.16b, v27.16b //AES block 8k+12 - round 13 7731 7732.inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 8k+13 - result 7733.inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result 7734 aese v3.16b, v27.16b //AES block 8k+11 - round 13 7735 7736 stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result 7737 mov v0.16b, v20.16b //CTR block 8k+16 7738.inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 8k+12 - result 7739 7740.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 7741.inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result 7742 stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result 7743 7744 mov v3.16b, v25.16b //CTR block 8k+19 7745 mov v2.16b, v23.16b //CTR block 8k+18 7746 aese v6.16b, v27.16b //AES block 8k+14 - round 13 7747 7748 mov v1.16b, v22.16b //CTR block 8k+17 7749 stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result 7750.inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 8k+15 - result 7751 7752.inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 8k+14 - result 7753 rev32 v4.16b, v30.16b //CTR block 8k+20 7754 add v30.4s, v30.4s, v31.4s //CTR block 8k+20 7755 7756 cmp x0, x5 //.LOOP CONTROL 7757 stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result 7758 b.lt .L256_dec_main_loop 7759 7760.L256_dec_prepretail: //PREPRETAIL 7761 ldp q26, q27, [x8, #0] //load rk0, rk1 7762 rev32 v5.16b, v30.16b //CTR block 8k+13 7763 add v30.4s, v30.4s, v31.4s //CTR block 8k+13 7764 7765 rev64 v12.16b, v12.16b //GHASH block 8k+4 7766 ldr q21, [x3, #144] //load h6k | h5k 7767 ldr q24, [x3, #192] //load h8k | h7k 7768 7769 rev32 v6.16b, v30.16b //CTR block 8k+14 7770 rev64 v8.16b, v8.16b //GHASH block 8k 7771 add v30.4s, v30.4s, v31.4s //CTR block 8k+14 7772 7773 ext v19.16b, v19.16b, v19.16b, #8 //PRE 0 7774 ldr q23, [x3, #176] //load h7l | h7h 7775 ext v23.16b, v23.16b, v23.16b, #8 7776 ldr q25, [x3, #208] //load h8l | h8h 7777 ext v25.16b, v25.16b, v25.16b, #8 7778 rev64 v9.16b, v9.16b //GHASH block 8k+1 7779 7780 rev32 v7.16b, v30.16b //CTR block 8k+15 7781 rev64 v10.16b, v10.16b //GHASH block 8k+2 7782 ldr q20, [x3, #128] //load h5l | h5h 7783 ext v20.16b, v20.16b, v20.16b, #8 7784 ldr q22, [x3, #160] //load h6l | h6h 7785 ext v22.16b, v22.16b, v22.16b, #8 7786 7787 aese v0.16b, v26.16b 7788 aesmc v0.16b, v0.16b //AES block 8k+8 - round 0 7789 aese v1.16b, v26.16b 7790 aesmc v1.16b, v1.16b //AES block 8k+9 - round 0 7791 aese v4.16b, v26.16b 7792 aesmc v4.16b, v4.16b //AES block 8k+12 - round 0 7793 7794 aese v3.16b, v26.16b 7795 aesmc v3.16b, v3.16b //AES block 8k+11 - round 0 7796 aese v5.16b, v26.16b 7797 aesmc v5.16b, v5.16b //AES block 8k+13 - round 0 7798 aese v6.16b, v26.16b 7799 aesmc v6.16b, v6.16b //AES block 8k+14 - round 0 7800 7801 aese v4.16b, v27.16b 7802 aesmc v4.16b, v4.16b //AES block 8k+12 - round 1 7803 aese v7.16b, v26.16b 7804 aesmc v7.16b, v7.16b //AES block 8k+15 - round 0 7805 aese v2.16b, v26.16b 7806 aesmc v2.16b, v2.16b //AES block 8k+10 - round 0 7807 7808 ldp q28, q26, [x8, #32] //load rk2, rk3 7809 aese v0.16b, v27.16b 7810 aesmc v0.16b, v0.16b //AES block 8k+8 - round 1 7811 eor v8.16b, v8.16b, v19.16b //PRE 1 7812 7813 aese v7.16b, v27.16b 7814 aesmc v7.16b, v7.16b //AES block 8k+15 - round 1 7815 aese v6.16b, v27.16b 7816 aesmc v6.16b, v6.16b //AES block 8k+14 - round 1 7817 aese v2.16b, v27.16b 7818 aesmc v2.16b, v2.16b //AES block 8k+10 - round 1 7819 7820 aese v3.16b, v27.16b 7821 aesmc v3.16b, v3.16b //AES block 8k+11 - round 1 7822 aese v1.16b, v27.16b 7823 aesmc v1.16b, v1.16b //AES block 8k+9 - round 1 7824 aese v5.16b, v27.16b 7825 aesmc v5.16b, v5.16b //AES block 8k+13 - round 1 7826 7827 pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high 7828 trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7829 pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low 7830 7831 rev64 v11.16b, v11.16b //GHASH block 8k+3 7832 pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low 7833 7834 aese v5.16b, v28.16b 7835 aesmc v5.16b, v5.16b //AES block 8k+13 - round 2 7836 aese v7.16b, v28.16b 7837 aesmc v7.16b, v7.16b //AES block 8k+15 - round 2 7838 aese v1.16b, v28.16b 7839 aesmc v1.16b, v1.16b //AES block 8k+9 - round 2 7840 7841 aese v3.16b, v28.16b 7842 aesmc v3.16b, v3.16b //AES block 8k+11 - round 2 7843 aese v6.16b, v28.16b 7844 aesmc v6.16b, v6.16b //AES block 8k+14 - round 2 7845 pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high 7846 7847 aese v0.16b, v28.16b 7848 aesmc v0.16b, v0.16b //AES block 8k+8 - round 2 7849 aese v7.16b, v26.16b 7850 aesmc v7.16b, v7.16b //AES block 8k+15 - round 3 7851 7852 aese v5.16b, v26.16b 7853 aesmc v5.16b, v5.16b //AES block 8k+13 - round 3 7854 rev64 v14.16b, v14.16b //GHASH block 8k+6 7855 7856 aese v0.16b, v26.16b 7857 aesmc v0.16b, v0.16b //AES block 8k+8 - round 3 7858 aese v2.16b, v28.16b 7859 aesmc v2.16b, v2.16b //AES block 8k+10 - round 2 7860 aese v6.16b, v26.16b 7861 aesmc v6.16b, v6.16b //AES block 8k+14 - round 3 7862 7863 pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high 7864 trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid 7865 aese v4.16b, v28.16b 7866 aesmc v4.16b, v4.16b //AES block 8k+12 - round 2 7867 7868 ldp q27, q28, [x8, #64] //load rk4, rk5 7869 aese v1.16b, v26.16b 7870 aesmc v1.16b, v1.16b //AES block 8k+9 - round 3 7871 pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high 7872 7873 aese v2.16b, v26.16b 7874 aesmc v2.16b, v2.16b //AES block 8k+10 - round 3 7875 eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high 7876 eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid 7877 7878 aese v4.16b, v26.16b 7879 aesmc v4.16b, v4.16b //AES block 8k+12 - round 3 7880 pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low 7881 aese v3.16b, v26.16b 7882 aesmc v3.16b, v3.16b //AES block 8k+11 - round 3 7883 7884.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high 7885 trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7886 trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid 7887 7888 pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid 7889 pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low 7890 eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low 7891 7892 pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid 7893 aese v5.16b, v27.16b 7894 aesmc v5.16b, v5.16b //AES block 8k+13 - round 4 7895 aese v0.16b, v27.16b 7896 aesmc v0.16b, v0.16b //AES block 8k+8 - round 4 7897 7898.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low 7899 ldr q20, [x3, #32] //load h1l | h1h 7900 ext v20.16b, v20.16b, v20.16b, #8 7901 ldr q22, [x3, #64] //load h2l | h2h 7902 ext v22.16b, v22.16b, v22.16b, #8 7903 aese v7.16b, v27.16b 7904 aesmc v7.16b, v7.16b //AES block 8k+15 - round 4 7905 7906 aese v2.16b, v27.16b 7907 aesmc v2.16b, v2.16b //AES block 8k+10 - round 4 7908 aese v6.16b, v27.16b 7909 aesmc v6.16b, v6.16b //AES block 8k+14 - round 4 7910 eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid 7911 7912 eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7913 aese v7.16b, v28.16b 7914 aesmc v7.16b, v7.16b //AES block 8k+15 - round 5 7915 aese v1.16b, v27.16b 7916 aesmc v1.16b, v1.16b //AES block 8k+9 - round 4 7917 7918 aese v2.16b, v28.16b 7919 aesmc v2.16b, v2.16b //AES block 8k+10 - round 5 7920 aese v3.16b, v27.16b 7921 aesmc v3.16b, v3.16b //AES block 8k+11 - round 4 7922 aese v4.16b, v27.16b 7923 aesmc v4.16b, v4.16b //AES block 8k+12 - round 4 7924 7925 aese v1.16b, v28.16b 7926 aesmc v1.16b, v1.16b //AES block 8k+9 - round 5 7927 pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid 7928 aese v6.16b, v28.16b 7929 aesmc v6.16b, v6.16b //AES block 8k+14 - round 5 7930 7931 aese v4.16b, v28.16b 7932 aesmc v4.16b, v4.16b //AES block 8k+12 - round 5 7933 aese v3.16b, v28.16b 7934 aesmc v3.16b, v3.16b //AES block 8k+11 - round 5 7935 pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid 7936 7937 aese v0.16b, v28.16b 7938 aesmc v0.16b, v0.16b //AES block 8k+8 - round 5 7939 aese v5.16b, v28.16b 7940 aesmc v5.16b, v5.16b //AES block 8k+13 - round 5 7941 ldp q26, q27, [x8, #96] //load rk6, rk7 7942 7943 ldr q23, [x3, #80] //load h3l | h3h 7944 ext v23.16b, v23.16b, v23.16b, #8 7945 ldr q25, [x3, #112] //load h4l | h4h 7946 ext v25.16b, v25.16b, v25.16b, #8 7947 rev64 v15.16b, v15.16b //GHASH block 8k+7 7948 rev64 v13.16b, v13.16b //GHASH block 8k+5 7949 7950.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid 7951 7952 trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7953 7954 aese v0.16b, v26.16b 7955 aesmc v0.16b, v0.16b //AES block 8k+8 - round 6 7956 ldr q21, [x3, #48] //load h2k | h1k 7957 ldr q24, [x3, #96] //load h4k | h3k 7958 aese v6.16b, v26.16b 7959 aesmc v6.16b, v6.16b //AES block 8k+14 - round 6 7960 7961 aese v5.16b, v26.16b 7962 aesmc v5.16b, v5.16b //AES block 8k+13 - round 6 7963 aese v7.16b, v26.16b 7964 aesmc v7.16b, v7.16b //AES block 8k+15 - round 6 7965 7966 pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high 7967 pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high 7968 pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low 7969 7970 trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid 7971 pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low 7972 trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 7973 7974 aese v7.16b, v27.16b 7975 aesmc v7.16b, v7.16b //AES block 8k+15 - round 7 7976 pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high 7977 aese v1.16b, v26.16b 7978 aesmc v1.16b, v1.16b //AES block 8k+9 - round 6 7979 7980 aese v2.16b, v26.16b 7981 aesmc v2.16b, v2.16b //AES block 8k+10 - round 6 7982 aese v3.16b, v26.16b 7983 aesmc v3.16b, v3.16b //AES block 8k+11 - round 6 7984 aese v4.16b, v26.16b 7985 aesmc v4.16b, v4.16b //AES block 8k+12 - round 6 7986 7987 ldp q28, q26, [x8, #128] //load rk8, rk9 7988 pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low 7989 aese v5.16b, v27.16b 7990 aesmc v5.16b, v5.16b //AES block 8k+13 - round 7 7991 7992 aese v1.16b, v27.16b 7993 aesmc v1.16b, v1.16b //AES block 8k+9 - round 7 7994 aese v4.16b, v27.16b 7995 aesmc v4.16b, v4.16b //AES block 8k+12 - round 7 7996 7997 aese v6.16b, v27.16b 7998 aesmc v6.16b, v6.16b //AES block 8k+14 - round 7 7999 aese v2.16b, v27.16b 8000 aesmc v2.16b, v2.16b //AES block 8k+10 - round 7 8001.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high 8002 8003 aese v0.16b, v27.16b 8004 aesmc v0.16b, v0.16b //AES block 8k+8 - round 7 8005 trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid 8006 aese v3.16b, v27.16b 8007 aesmc v3.16b, v3.16b //AES block 8k+11 - round 7 8008 8009 aese v0.16b, v28.16b 8010 aesmc v0.16b, v0.16b //AES block 8k+8 - round 8 8011 aese v7.16b, v28.16b 8012 aesmc v7.16b, v7.16b //AES block 8k+15 - round 8 8013 aese v4.16b, v28.16b 8014 aesmc v4.16b, v4.16b //AES block 8k+12 - round 8 8015 8016 aese v1.16b, v28.16b 8017 aesmc v1.16b, v1.16b //AES block 8k+9 - round 8 8018 aese v5.16b, v28.16b 8019 aesmc v5.16b, v5.16b //AES block 8k+13 - round 8 8020 aese v6.16b, v28.16b 8021 aesmc v6.16b, v6.16b //AES block 8k+14 - round 8 8022 8023 aese v3.16b, v28.16b 8024 aesmc v3.16b, v3.16b //AES block 8k+11 - round 8 8025 aese v4.16b, v26.16b 8026 aesmc v4.16b, v4.16b //AES block 8k+12 - round 9 8027 eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 8028 8029 aese v0.16b, v26.16b 8030 aesmc v0.16b, v0.16b //AES block 8k+8 - round 9 8031 aese v1.16b, v26.16b 8032 aesmc v1.16b, v1.16b //AES block 8k+9 - round 9 8033 eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 8034 8035 aese v6.16b, v26.16b 8036 aesmc v6.16b, v6.16b //AES block 8k+14 - round 9 8037 aese v7.16b, v26.16b 8038 aesmc v7.16b, v7.16b //AES block 8k+15 - round 9 8039 pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid 8040 8041 aese v2.16b, v28.16b 8042 aesmc v2.16b, v2.16b //AES block 8k+10 - round 8 8043 pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid 8044 pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high 8045 8046 pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid 8047 pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid 8048 pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low 8049 8050 ldp q27, q28, [x8, #160] //load rk10, rk11 8051.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low 8052.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid 8053 8054 aese v2.16b, v26.16b 8055 aesmc v2.16b, v2.16b //AES block 8k+10 - round 9 8056 aese v3.16b, v26.16b 8057 aesmc v3.16b, v3.16b //AES block 8k+11 - round 9 8058 aese v5.16b, v26.16b 8059 aesmc v5.16b, v5.16b //AES block 8k+13 - round 9 8060 8061.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high 8062.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low 8063 ldr d16, [x10] //MODULO - load modulo constant 8064 8065.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid 8066 8067 aese v4.16b, v27.16b 8068 aesmc v4.16b, v4.16b //AES block 8k+12 - round 10 8069 aese v6.16b, v27.16b 8070 aesmc v6.16b, v6.16b //AES block 8k+14 - round 10 8071 aese v5.16b, v27.16b 8072 aesmc v5.16b, v5.16b //AES block 8k+13 - round 10 8073 8074 aese v0.16b, v27.16b 8075 aesmc v0.16b, v0.16b //AES block 8k+8 - round 10 8076 aese v2.16b, v27.16b 8077 aesmc v2.16b, v2.16b //AES block 8k+10 - round 10 8078 aese v3.16b, v27.16b 8079 aesmc v3.16b, v3.16b //AES block 8k+11 - round 10 8080 8081.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 8082 8083 aese v7.16b, v27.16b 8084 aesmc v7.16b, v7.16b //AES block 8k+15 - round 10 8085 aese v1.16b, v27.16b 8086 aesmc v1.16b, v1.16b //AES block 8k+9 - round 10 8087 ldp q26, q27, [x8, #192] //load rk12, rk13 8088 8089 ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 8090 8091 aese v2.16b, v28.16b 8092 aesmc v2.16b, v2.16b //AES block 8k+10 - round 11 8093 aese v1.16b, v28.16b 8094 aesmc v1.16b, v1.16b //AES block 8k+9 - round 11 8095 aese v0.16b, v28.16b 8096 aesmc v0.16b, v0.16b //AES block 8k+8 - round 11 8097 8098 pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 8099 aese v3.16b, v28.16b 8100 aesmc v3.16b, v3.16b //AES block 8k+11 - round 11 8101 8102 aese v7.16b, v28.16b 8103 aesmc v7.16b, v7.16b //AES block 8k+15 - round 11 8104 aese v6.16b, v28.16b 8105 aesmc v6.16b, v6.16b //AES block 8k+14 - round 11 8106 aese v4.16b, v28.16b 8107 aesmc v4.16b, v4.16b //AES block 8k+12 - round 11 8108 8109 aese v5.16b, v28.16b 8110 aesmc v5.16b, v5.16b //AES block 8k+13 - round 11 8111 aese v3.16b, v26.16b 8112 aesmc v3.16b, v3.16b //AES block 8k+11 - round 12 8113 8114.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid 8115 8116 aese v3.16b, v27.16b //AES block 8k+11 - round 13 8117 aese v2.16b, v26.16b 8118 aesmc v2.16b, v2.16b //AES block 8k+10 - round 12 8119 aese v6.16b, v26.16b 8120 aesmc v6.16b, v6.16b //AES block 8k+14 - round 12 8121 8122 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 8123 aese v4.16b, v26.16b 8124 aesmc v4.16b, v4.16b //AES block 8k+12 - round 12 8125 aese v7.16b, v26.16b 8126 aesmc v7.16b, v7.16b //AES block 8k+15 - round 12 8127 8128 aese v0.16b, v26.16b 8129 aesmc v0.16b, v0.16b //AES block 8k+8 - round 12 8130 ldr q28, [x8, #224] //load rk14 8131 aese v1.16b, v26.16b 8132 aesmc v1.16b, v1.16b //AES block 8k+9 - round 12 8133 8134 aese v4.16b, v27.16b //AES block 8k+12 - round 13 8135 ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 8136 aese v5.16b, v26.16b 8137 aesmc v5.16b, v5.16b //AES block 8k+13 - round 12 8138 8139 aese v6.16b, v27.16b //AES block 8k+14 - round 13 8140 aese v2.16b, v27.16b //AES block 8k+10 - round 13 8141 aese v1.16b, v27.16b //AES block 8k+9 - round 13 8142 8143 aese v5.16b, v27.16b //AES block 8k+13 - round 13 8144.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low 8145 add v30.4s, v30.4s, v31.4s //CTR block 8k+15 8146 8147 aese v7.16b, v27.16b //AES block 8k+15 - round 13 8148 aese v0.16b, v27.16b //AES block 8k+8 - round 13 8149.L256_dec_tail: //TAIL 8150 8151 ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag 8152 sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process 8153 cmp x5, #112 8154 8155 ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext 8156 8157 ldp q24, q25, [x3, #192] //load h8k | h7k 8158 ext v25.16b, v25.16b, v25.16b, #8 8159 mov v29.16b, v28.16b 8160 8161 ldp q20, q21, [x3, #128] //load h5l | h5h 8162 ext v20.16b, v20.16b, v20.16b, #8 8163 8164.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result 8165 ldp q22, q23, [x3, #160] //load h6l | h6h 8166 ext v22.16b, v22.16b, v22.16b, #8 8167 ext v23.16b, v23.16b, v23.16b, #8 8168 b.gt .L256_dec_blocks_more_than_7 8169 8170 mov v7.16b, v6.16b 8171 sub v30.4s, v30.4s, v31.4s 8172 mov v6.16b, v5.16b 8173 8174 mov v5.16b, v4.16b 8175 mov v4.16b, v3.16b 8176 movi v19.8b, #0 8177 8178 movi v17.8b, #0 8179 movi v18.8b, #0 8180 mov v3.16b, v2.16b 8181 8182 cmp x5, #96 8183 mov v2.16b, v1.16b 8184 b.gt .L256_dec_blocks_more_than_6 8185 8186 mov v7.16b, v6.16b 8187 mov v6.16b, v5.16b 8188 8189 mov v5.16b, v4.16b 8190 cmp x5, #80 8191 sub v30.4s, v30.4s, v31.4s 8192 8193 mov v4.16b, v3.16b 8194 mov v3.16b, v1.16b 8195 b.gt .L256_dec_blocks_more_than_5 8196 8197 cmp x5, #64 8198 mov v7.16b, v6.16b 8199 sub v30.4s, v30.4s, v31.4s 8200 8201 mov v6.16b, v5.16b 8202 8203 mov v5.16b, v4.16b 8204 mov v4.16b, v1.16b 8205 b.gt .L256_dec_blocks_more_than_4 8206 8207 sub v30.4s, v30.4s, v31.4s 8208 mov v7.16b, v6.16b 8209 cmp x5, #48 8210 8211 mov v6.16b, v5.16b 8212 mov v5.16b, v1.16b 8213 b.gt .L256_dec_blocks_more_than_3 8214 8215 ldr q24, [x3, #96] //load h4k | h3k 8216 sub v30.4s, v30.4s, v31.4s 8217 mov v7.16b, v6.16b 8218 8219 cmp x5, #32 8220 mov v6.16b, v1.16b 8221 b.gt .L256_dec_blocks_more_than_2 8222 8223 sub v30.4s, v30.4s, v31.4s 8224 8225 mov v7.16b, v1.16b 8226 cmp x5, #16 8227 b.gt .L256_dec_blocks_more_than_1 8228 8229 sub v30.4s, v30.4s, v31.4s 8230 ldr q21, [x3, #48] //load h2k | h1k 8231 b .L256_dec_blocks_less_than_1 8232.L256_dec_blocks_more_than_7: //blocks left > 7 8233 rev64 v8.16b, v9.16b //GHASH final-7 block 8234 ldr q9, [x0], #16 //AES final-6 block - load ciphertext 8235 st1 { v12.16b}, [x2], #16 //AES final-7 block - store result 8236 8237 ins v18.d[0], v24.d[1] //GHASH final-7 block - mid 8238 8239 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8240 8241 ins v27.d[0], v8.d[1] //GHASH final-7 block - mid 8242.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result 8243 8244 pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high 8245 8246 eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid 8247 movi v16.8b, #0 //suppress further partial tag feed in 8248 8249 pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low 8250 pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid 8251.L256_dec_blocks_more_than_6: //blocks left > 6 8252 8253 rev64 v8.16b, v9.16b //GHASH final-6 block 8254 8255 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8256 ldr q9, [x0], #16 //AES final-5 block - load ciphertext 8257 movi v16.8b, #0 //suppress further partial tag feed in 8258 8259 ins v27.d[0], v8.d[1] //GHASH final-6 block - mid 8260 st1 { v12.16b}, [x2], #16 //AES final-6 block - store result 8261 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high 8262 8263 pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low 8264 8265.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result 8266 eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low 8267 eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid 8268 8269 pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid 8270 8271 eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid 8272 eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high 8273.L256_dec_blocks_more_than_5: //blocks left > 5 8274 8275 rev64 v8.16b, v9.16b //GHASH final-5 block 8276 8277 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8278 8279 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high 8280 ins v27.d[0], v8.d[1] //GHASH final-5 block - mid 8281 8282 ldr q9, [x0], #16 //AES final-4 block - load ciphertext 8283 8284 eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid 8285 st1 { v12.16b}, [x2], #16 //AES final-5 block - store result 8286 8287 pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low 8288 ins v27.d[1], v27.d[0] //GHASH final-5 block - mid 8289 8290 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid 8291 8292 eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high 8293.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result 8294 eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low 8295 8296 eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid 8297 movi v16.8b, #0 //suppress further partial tag feed in 8298.L256_dec_blocks_more_than_4: //blocks left > 4 8299 8300 rev64 v8.16b, v9.16b //GHASH final-4 block 8301 8302 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8303 8304 ins v27.d[0], v8.d[1] //GHASH final-4 block - mid 8305 ldr q9, [x0], #16 //AES final-3 block - load ciphertext 8306 8307 movi v16.8b, #0 //suppress further partial tag feed in 8308 8309 pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low 8310 pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high 8311 8312 eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid 8313 8314 eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high 8315 8316 pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid 8317 8318 eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low 8319 st1 { v12.16b}, [x2], #16 //AES final-4 block - store result 8320 8321 eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid 8322.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result 8323.L256_dec_blocks_more_than_3: //blocks left > 3 8324 8325 ldr q25, [x3, #112] //load h4l | h4h 8326 ext v25.16b, v25.16b, v25.16b, #8 8327 rev64 v8.16b, v9.16b //GHASH final-3 block 8328 8329 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8330 ldr q9, [x0], #16 //AES final-2 block - load ciphertext 8331 ldr q24, [x3, #96] //load h4k | h3k 8332 8333 ins v27.d[0], v8.d[1] //GHASH final-3 block - mid 8334 st1 { v12.16b}, [x2], #16 //AES final-3 block - store result 8335 8336.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result 8337 8338 eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid 8339 8340 ins v27.d[1], v27.d[0] //GHASH final-3 block - mid 8341 pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low 8342 pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high 8343 8344 movi v16.8b, #0 //suppress further partial tag feed in 8345 pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid 8346 eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low 8347 8348 eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high 8349 8350 eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid 8351.L256_dec_blocks_more_than_2: //blocks left > 2 8352 8353 rev64 v8.16b, v9.16b //GHASH final-2 block 8354 8355 ldr q23, [x3, #80] //load h3l | h3h 8356 ext v23.16b, v23.16b, v23.16b, #8 8357 ldr q9, [x0], #16 //AES final-1 block - load ciphertext 8358 8359 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8360 8361 ins v27.d[0], v8.d[1] //GHASH final-2 block - mid 8362 8363 pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low 8364 st1 { v12.16b}, [x2], #16 //AES final-2 block - store result 8365.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result 8366 8367 eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid 8368 eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low 8369 movi v16.8b, #0 //suppress further partial tag feed in 8370 8371 pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid 8372 pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high 8373 8374 eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid 8375 eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high 8376.L256_dec_blocks_more_than_1: //blocks left > 1 8377 8378 rev64 v8.16b, v9.16b //GHASH final-1 block 8379 8380 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8381 8382 ins v27.d[0], v8.d[1] //GHASH final-1 block - mid 8383 ldr q22, [x3, #64] //load h2l | h2h 8384 ext v22.16b, v22.16b, v22.16b, #8 8385 8386 eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid 8387 ldr q9, [x0], #16 //AES final block - load ciphertext 8388 st1 { v12.16b}, [x2], #16 //AES final-1 block - store result 8389 8390 ldr q21, [x3, #48] //load h2k | h1k 8391 pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low 8392 8393 ins v27.d[1], v27.d[0] //GHASH final-1 block - mid 8394 8395 eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low 8396 8397.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result 8398 pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high 8399 8400 pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid 8401 8402 movi v16.8b, #0 //suppress further partial tag feed in 8403 eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high 8404 8405 eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid 8406.L256_dec_blocks_less_than_1: //blocks left <= 1 8407 8408 ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored 8409 mvn x6, xzr //temp0_x = 0xffffffffffffffff 8410 and x1, x1, #127 //bit_length %= 128 8411 8412 sub x1, x1, #128 //bit_length -= 128 8413 rev32 v30.16b, v30.16b 8414 str q30, [x16] //store the updated counter 8415 8416 neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128]) 8417 8418 and x1, x1, #127 //bit_length %= 128 8419 8420 lsr x6, x6, x1 //temp0_x is mask for top 64b of last block 8421 cmp x1, #64 8422 mvn x7, xzr //temp1_x = 0xffffffffffffffff 8423 8424 csel x14, x6, xzr, lt 8425 csel x13, x7, x6, lt 8426 8427 mov v0.d[0], x13 //ctr0b is mask for last block 8428 mov v0.d[1], x14 8429 8430 and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits 8431 ldr q20, [x3, #32] //load h1l | h1h 8432 ext v20.16b, v20.16b, v20.16b, #8 8433 bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing 8434 8435 rev64 v8.16b, v9.16b //GHASH final block 8436 8437 eor v8.16b, v8.16b, v16.16b //feed in partial tag 8438 8439 ins v16.d[0], v8.d[1] //GHASH final block - mid 8440 pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high 8441 8442 eor v16.8b, v16.8b, v8.8b //GHASH final block - mid 8443 8444 pmull v26.1q, v8.1d, v20.1d //GHASH final block - low 8445 eor v17.16b, v17.16b, v28.16b //GHASH final block - high 8446 8447 pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid 8448 8449 eor v18.16b, v18.16b, v16.16b //GHASH final block - mid 8450 ldr d16, [x10] //MODULO - load modulo constant 8451 eor v19.16b, v19.16b, v26.16b //GHASH final block - low 8452 8453 pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid 8454 eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up 8455 8456 ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment 8457 st1 { v12.16b}, [x2] //store all 16B 8458 8459 eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up 8460 8461 eor v21.16b, v17.16b, v21.16b //MODULO - fold into mid 8462 eor v18.16b, v18.16b, v21.16b //MODULO - fold into mid 8463 8464 pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low 8465 8466 ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment 8467 eor v19.16b, v19.16b, v17.16b //MODULO - fold into low 8468 8469 eor v19.16b, v19.16b, v18.16b //MODULO - fold into low 8470 ext v19.16b, v19.16b, v19.16b, #8 8471 rev64 v19.16b, v19.16b 8472 st1 { v19.16b }, [x3] 8473 mov x0, x9 8474 8475 ldp d10, d11, [sp, #16] 8476 ldp d12, d13, [sp, #32] 8477 ldp d14, d15, [sp, #48] 8478 ldp d8, d9, [sp], #80 8479 ret 8480 8481.L256_dec_ret: 8482 mov w0, #0x0 8483 ret 8484.size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel 8485.byte 65,69,83,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,65,82,77,118,56,44,32,83,80,68,88,32,66,83,68,45,51,45,67,108,97,117,115,101,32,98,121,32,60,120,105,97,111,107,97,110,103,46,113,105,97,110,64,97,114,109,46,99,111,109,62,0 8486.align 2 8487.align 2 8488#endif 8489