1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from sha512-armv8.pl. */ 3// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 4// 5// Licensed under the OpenSSL license (the "License"). You may not use 6// this file except in compliance with the License. You can obtain a copy 7// in the file LICENSE in the source distribution or at 8// https://www.openssl.org/source/license.html 9 10// ==================================================================== 11// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12// project. The module is, however, dual licensed under OpenSSL and 13// CRYPTOGAMS licenses depending on where you obtain it. For further 14// details see http://www.openssl.org/~appro/cryptogams/. 15// 16// Permission to use under GPLv2 terms is granted. 17// ==================================================================== 18// 19// SHA256/512 for ARMv8. 20// 21// Performance in cycles per processed byte and improvement coefficient 22// over code generated with "default" compiler: 23// 24// SHA256-hw SHA256(*) SHA512 25// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 26// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 27// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 28// Denver 2.01 10.5 (+26%) 6.70 (+8%) 29// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 30// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 31// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 32// 33// (*) Software SHA256 results are of lesser relevance, presented 34// mostly for informational purposes. 35// (**) The result is a trade-off: it's possible to improve it by 36// 10% (or by 1 cycle per round), but at the cost of 20% loss 37// on Cortex-A53 (or by 4 cycles per round). 38// (***) Super-impressive coefficients over gcc-generated code are 39// indication of some compiler "pathology", most notably code 40// generated with -mgeneral-regs-only is significantly faster 41// and the gap is only 40-90%. 42// 43// October 2016. 44// 45// Originally it was reckoned that it makes no sense to implement NEON 46// version of SHA256 for 64-bit processors. This is because performance 47// improvement on most wide-spread Cortex-A5x processors was observed 48// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was 49// observed that 32-bit NEON SHA256 performs significantly better than 50// 64-bit scalar version on *some* of the more recent processors. As 51// result 64-bit NEON version of SHA256 was added to provide best 52// all-round performance. For example it executes ~30% faster on X-Gene 53// and Mongoose. [For reference, NEON version of SHA512 is bound to 54// deliver much less improvement, likely *negative* on Cortex-A5x. 55// Which is why NEON support is limited to SHA256.] 56 57#ifndef __KERNEL__ 58# include "arm_arch.h" 59#endif 60 61.text 62 63 64.hidden OPENSSL_armcap_P 65.globl sha512_block_data_order 66.type sha512_block_data_order,%function 67.align 6 68sha512_block_data_order: 69#ifndef __KERNEL__ 70# ifdef __ILP32__ 71 ldrsw x16,.LOPENSSL_armcap_P 72# else 73 ldr x16,.LOPENSSL_armcap_P 74# endif 75 adr x17,.LOPENSSL_armcap_P 76 add x16,x16,x17 77 ldr w16,[x16] 78 tst w16,#ARMV8_SHA512 79 b.ne .Lv8_entry 80#endif 81.inst 0xd503233f // paciasp 82 stp x29,x30,[sp,#-128]! 83 add x29,sp,#0 84 85 stp x19,x20,[sp,#16] 86 stp x21,x22,[sp,#32] 87 stp x23,x24,[sp,#48] 88 stp x25,x26,[sp,#64] 89 stp x27,x28,[sp,#80] 90 sub sp,sp,#4*8 91 92 ldp x20,x21,[x0] // load context 93 ldp x22,x23,[x0,#2*8] 94 ldp x24,x25,[x0,#4*8] 95 add x2,x1,x2,lsl#7 // end of input 96 ldp x26,x27,[x0,#6*8] 97 adr x30,.LK512 98 stp x0,x2,[x29,#96] 99 100.Loop: 101 ldp x3,x4,[x1],#2*8 102 ldr x19,[x30],#8 // *K++ 103 eor x28,x21,x22 // magic seed 104 str x1,[x29,#112] 105#ifndef __AARCH64EB__ 106 rev x3,x3 // 0 107#endif 108 ror x16,x24,#14 109 add x27,x27,x19 // h+=K[i] 110 eor x6,x24,x24,ror#23 111 and x17,x25,x24 112 bic x19,x26,x24 113 add x27,x27,x3 // h+=X[i] 114 orr x17,x17,x19 // Ch(e,f,g) 115 eor x19,x20,x21 // a^b, b^c in next round 116 eor x16,x16,x6,ror#18 // Sigma1(e) 117 ror x6,x20,#28 118 add x27,x27,x17 // h+=Ch(e,f,g) 119 eor x17,x20,x20,ror#5 120 add x27,x27,x16 // h+=Sigma1(e) 121 and x28,x28,x19 // (b^c)&=(a^b) 122 add x23,x23,x27 // d+=h 123 eor x28,x28,x21 // Maj(a,b,c) 124 eor x17,x6,x17,ror#34 // Sigma0(a) 125 add x27,x27,x28 // h+=Maj(a,b,c) 126 ldr x28,[x30],#8 // *K++, x19 in next round 127 //add x27,x27,x17 // h+=Sigma0(a) 128#ifndef __AARCH64EB__ 129 rev x4,x4 // 1 130#endif 131 ldp x5,x6,[x1],#2*8 132 add x27,x27,x17 // h+=Sigma0(a) 133 ror x16,x23,#14 134 add x26,x26,x28 // h+=K[i] 135 eor x7,x23,x23,ror#23 136 and x17,x24,x23 137 bic x28,x25,x23 138 add x26,x26,x4 // h+=X[i] 139 orr x17,x17,x28 // Ch(e,f,g) 140 eor x28,x27,x20 // a^b, b^c in next round 141 eor x16,x16,x7,ror#18 // Sigma1(e) 142 ror x7,x27,#28 143 add x26,x26,x17 // h+=Ch(e,f,g) 144 eor x17,x27,x27,ror#5 145 add x26,x26,x16 // h+=Sigma1(e) 146 and x19,x19,x28 // (b^c)&=(a^b) 147 add x22,x22,x26 // d+=h 148 eor x19,x19,x20 // Maj(a,b,c) 149 eor x17,x7,x17,ror#34 // Sigma0(a) 150 add x26,x26,x19 // h+=Maj(a,b,c) 151 ldr x19,[x30],#8 // *K++, x28 in next round 152 //add x26,x26,x17 // h+=Sigma0(a) 153#ifndef __AARCH64EB__ 154 rev x5,x5 // 2 155#endif 156 add x26,x26,x17 // h+=Sigma0(a) 157 ror x16,x22,#14 158 add x25,x25,x19 // h+=K[i] 159 eor x8,x22,x22,ror#23 160 and x17,x23,x22 161 bic x19,x24,x22 162 add x25,x25,x5 // h+=X[i] 163 orr x17,x17,x19 // Ch(e,f,g) 164 eor x19,x26,x27 // a^b, b^c in next round 165 eor x16,x16,x8,ror#18 // Sigma1(e) 166 ror x8,x26,#28 167 add x25,x25,x17 // h+=Ch(e,f,g) 168 eor x17,x26,x26,ror#5 169 add x25,x25,x16 // h+=Sigma1(e) 170 and x28,x28,x19 // (b^c)&=(a^b) 171 add x21,x21,x25 // d+=h 172 eor x28,x28,x27 // Maj(a,b,c) 173 eor x17,x8,x17,ror#34 // Sigma0(a) 174 add x25,x25,x28 // h+=Maj(a,b,c) 175 ldr x28,[x30],#8 // *K++, x19 in next round 176 //add x25,x25,x17 // h+=Sigma0(a) 177#ifndef __AARCH64EB__ 178 rev x6,x6 // 3 179#endif 180 ldp x7,x8,[x1],#2*8 181 add x25,x25,x17 // h+=Sigma0(a) 182 ror x16,x21,#14 183 add x24,x24,x28 // h+=K[i] 184 eor x9,x21,x21,ror#23 185 and x17,x22,x21 186 bic x28,x23,x21 187 add x24,x24,x6 // h+=X[i] 188 orr x17,x17,x28 // Ch(e,f,g) 189 eor x28,x25,x26 // a^b, b^c in next round 190 eor x16,x16,x9,ror#18 // Sigma1(e) 191 ror x9,x25,#28 192 add x24,x24,x17 // h+=Ch(e,f,g) 193 eor x17,x25,x25,ror#5 194 add x24,x24,x16 // h+=Sigma1(e) 195 and x19,x19,x28 // (b^c)&=(a^b) 196 add x20,x20,x24 // d+=h 197 eor x19,x19,x26 // Maj(a,b,c) 198 eor x17,x9,x17,ror#34 // Sigma0(a) 199 add x24,x24,x19 // h+=Maj(a,b,c) 200 ldr x19,[x30],#8 // *K++, x28 in next round 201 //add x24,x24,x17 // h+=Sigma0(a) 202#ifndef __AARCH64EB__ 203 rev x7,x7 // 4 204#endif 205 add x24,x24,x17 // h+=Sigma0(a) 206 ror x16,x20,#14 207 add x23,x23,x19 // h+=K[i] 208 eor x10,x20,x20,ror#23 209 and x17,x21,x20 210 bic x19,x22,x20 211 add x23,x23,x7 // h+=X[i] 212 orr x17,x17,x19 // Ch(e,f,g) 213 eor x19,x24,x25 // a^b, b^c in next round 214 eor x16,x16,x10,ror#18 // Sigma1(e) 215 ror x10,x24,#28 216 add x23,x23,x17 // h+=Ch(e,f,g) 217 eor x17,x24,x24,ror#5 218 add x23,x23,x16 // h+=Sigma1(e) 219 and x28,x28,x19 // (b^c)&=(a^b) 220 add x27,x27,x23 // d+=h 221 eor x28,x28,x25 // Maj(a,b,c) 222 eor x17,x10,x17,ror#34 // Sigma0(a) 223 add x23,x23,x28 // h+=Maj(a,b,c) 224 ldr x28,[x30],#8 // *K++, x19 in next round 225 //add x23,x23,x17 // h+=Sigma0(a) 226#ifndef __AARCH64EB__ 227 rev x8,x8 // 5 228#endif 229 ldp x9,x10,[x1],#2*8 230 add x23,x23,x17 // h+=Sigma0(a) 231 ror x16,x27,#14 232 add x22,x22,x28 // h+=K[i] 233 eor x11,x27,x27,ror#23 234 and x17,x20,x27 235 bic x28,x21,x27 236 add x22,x22,x8 // h+=X[i] 237 orr x17,x17,x28 // Ch(e,f,g) 238 eor x28,x23,x24 // a^b, b^c in next round 239 eor x16,x16,x11,ror#18 // Sigma1(e) 240 ror x11,x23,#28 241 add x22,x22,x17 // h+=Ch(e,f,g) 242 eor x17,x23,x23,ror#5 243 add x22,x22,x16 // h+=Sigma1(e) 244 and x19,x19,x28 // (b^c)&=(a^b) 245 add x26,x26,x22 // d+=h 246 eor x19,x19,x24 // Maj(a,b,c) 247 eor x17,x11,x17,ror#34 // Sigma0(a) 248 add x22,x22,x19 // h+=Maj(a,b,c) 249 ldr x19,[x30],#8 // *K++, x28 in next round 250 //add x22,x22,x17 // h+=Sigma0(a) 251#ifndef __AARCH64EB__ 252 rev x9,x9 // 6 253#endif 254 add x22,x22,x17 // h+=Sigma0(a) 255 ror x16,x26,#14 256 add x21,x21,x19 // h+=K[i] 257 eor x12,x26,x26,ror#23 258 and x17,x27,x26 259 bic x19,x20,x26 260 add x21,x21,x9 // h+=X[i] 261 orr x17,x17,x19 // Ch(e,f,g) 262 eor x19,x22,x23 // a^b, b^c in next round 263 eor x16,x16,x12,ror#18 // Sigma1(e) 264 ror x12,x22,#28 265 add x21,x21,x17 // h+=Ch(e,f,g) 266 eor x17,x22,x22,ror#5 267 add x21,x21,x16 // h+=Sigma1(e) 268 and x28,x28,x19 // (b^c)&=(a^b) 269 add x25,x25,x21 // d+=h 270 eor x28,x28,x23 // Maj(a,b,c) 271 eor x17,x12,x17,ror#34 // Sigma0(a) 272 add x21,x21,x28 // h+=Maj(a,b,c) 273 ldr x28,[x30],#8 // *K++, x19 in next round 274 //add x21,x21,x17 // h+=Sigma0(a) 275#ifndef __AARCH64EB__ 276 rev x10,x10 // 7 277#endif 278 ldp x11,x12,[x1],#2*8 279 add x21,x21,x17 // h+=Sigma0(a) 280 ror x16,x25,#14 281 add x20,x20,x28 // h+=K[i] 282 eor x13,x25,x25,ror#23 283 and x17,x26,x25 284 bic x28,x27,x25 285 add x20,x20,x10 // h+=X[i] 286 orr x17,x17,x28 // Ch(e,f,g) 287 eor x28,x21,x22 // a^b, b^c in next round 288 eor x16,x16,x13,ror#18 // Sigma1(e) 289 ror x13,x21,#28 290 add x20,x20,x17 // h+=Ch(e,f,g) 291 eor x17,x21,x21,ror#5 292 add x20,x20,x16 // h+=Sigma1(e) 293 and x19,x19,x28 // (b^c)&=(a^b) 294 add x24,x24,x20 // d+=h 295 eor x19,x19,x22 // Maj(a,b,c) 296 eor x17,x13,x17,ror#34 // Sigma0(a) 297 add x20,x20,x19 // h+=Maj(a,b,c) 298 ldr x19,[x30],#8 // *K++, x28 in next round 299 //add x20,x20,x17 // h+=Sigma0(a) 300#ifndef __AARCH64EB__ 301 rev x11,x11 // 8 302#endif 303 add x20,x20,x17 // h+=Sigma0(a) 304 ror x16,x24,#14 305 add x27,x27,x19 // h+=K[i] 306 eor x14,x24,x24,ror#23 307 and x17,x25,x24 308 bic x19,x26,x24 309 add x27,x27,x11 // h+=X[i] 310 orr x17,x17,x19 // Ch(e,f,g) 311 eor x19,x20,x21 // a^b, b^c in next round 312 eor x16,x16,x14,ror#18 // Sigma1(e) 313 ror x14,x20,#28 314 add x27,x27,x17 // h+=Ch(e,f,g) 315 eor x17,x20,x20,ror#5 316 add x27,x27,x16 // h+=Sigma1(e) 317 and x28,x28,x19 // (b^c)&=(a^b) 318 add x23,x23,x27 // d+=h 319 eor x28,x28,x21 // Maj(a,b,c) 320 eor x17,x14,x17,ror#34 // Sigma0(a) 321 add x27,x27,x28 // h+=Maj(a,b,c) 322 ldr x28,[x30],#8 // *K++, x19 in next round 323 //add x27,x27,x17 // h+=Sigma0(a) 324#ifndef __AARCH64EB__ 325 rev x12,x12 // 9 326#endif 327 ldp x13,x14,[x1],#2*8 328 add x27,x27,x17 // h+=Sigma0(a) 329 ror x16,x23,#14 330 add x26,x26,x28 // h+=K[i] 331 eor x15,x23,x23,ror#23 332 and x17,x24,x23 333 bic x28,x25,x23 334 add x26,x26,x12 // h+=X[i] 335 orr x17,x17,x28 // Ch(e,f,g) 336 eor x28,x27,x20 // a^b, b^c in next round 337 eor x16,x16,x15,ror#18 // Sigma1(e) 338 ror x15,x27,#28 339 add x26,x26,x17 // h+=Ch(e,f,g) 340 eor x17,x27,x27,ror#5 341 add x26,x26,x16 // h+=Sigma1(e) 342 and x19,x19,x28 // (b^c)&=(a^b) 343 add x22,x22,x26 // d+=h 344 eor x19,x19,x20 // Maj(a,b,c) 345 eor x17,x15,x17,ror#34 // Sigma0(a) 346 add x26,x26,x19 // h+=Maj(a,b,c) 347 ldr x19,[x30],#8 // *K++, x28 in next round 348 //add x26,x26,x17 // h+=Sigma0(a) 349#ifndef __AARCH64EB__ 350 rev x13,x13 // 10 351#endif 352 add x26,x26,x17 // h+=Sigma0(a) 353 ror x16,x22,#14 354 add x25,x25,x19 // h+=K[i] 355 eor x0,x22,x22,ror#23 356 and x17,x23,x22 357 bic x19,x24,x22 358 add x25,x25,x13 // h+=X[i] 359 orr x17,x17,x19 // Ch(e,f,g) 360 eor x19,x26,x27 // a^b, b^c in next round 361 eor x16,x16,x0,ror#18 // Sigma1(e) 362 ror x0,x26,#28 363 add x25,x25,x17 // h+=Ch(e,f,g) 364 eor x17,x26,x26,ror#5 365 add x25,x25,x16 // h+=Sigma1(e) 366 and x28,x28,x19 // (b^c)&=(a^b) 367 add x21,x21,x25 // d+=h 368 eor x28,x28,x27 // Maj(a,b,c) 369 eor x17,x0,x17,ror#34 // Sigma0(a) 370 add x25,x25,x28 // h+=Maj(a,b,c) 371 ldr x28,[x30],#8 // *K++, x19 in next round 372 //add x25,x25,x17 // h+=Sigma0(a) 373#ifndef __AARCH64EB__ 374 rev x14,x14 // 11 375#endif 376 ldp x15,x0,[x1],#2*8 377 add x25,x25,x17 // h+=Sigma0(a) 378 str x6,[sp,#24] 379 ror x16,x21,#14 380 add x24,x24,x28 // h+=K[i] 381 eor x6,x21,x21,ror#23 382 and x17,x22,x21 383 bic x28,x23,x21 384 add x24,x24,x14 // h+=X[i] 385 orr x17,x17,x28 // Ch(e,f,g) 386 eor x28,x25,x26 // a^b, b^c in next round 387 eor x16,x16,x6,ror#18 // Sigma1(e) 388 ror x6,x25,#28 389 add x24,x24,x17 // h+=Ch(e,f,g) 390 eor x17,x25,x25,ror#5 391 add x24,x24,x16 // h+=Sigma1(e) 392 and x19,x19,x28 // (b^c)&=(a^b) 393 add x20,x20,x24 // d+=h 394 eor x19,x19,x26 // Maj(a,b,c) 395 eor x17,x6,x17,ror#34 // Sigma0(a) 396 add x24,x24,x19 // h+=Maj(a,b,c) 397 ldr x19,[x30],#8 // *K++, x28 in next round 398 //add x24,x24,x17 // h+=Sigma0(a) 399#ifndef __AARCH64EB__ 400 rev x15,x15 // 12 401#endif 402 add x24,x24,x17 // h+=Sigma0(a) 403 str x7,[sp,#0] 404 ror x16,x20,#14 405 add x23,x23,x19 // h+=K[i] 406 eor x7,x20,x20,ror#23 407 and x17,x21,x20 408 bic x19,x22,x20 409 add x23,x23,x15 // h+=X[i] 410 orr x17,x17,x19 // Ch(e,f,g) 411 eor x19,x24,x25 // a^b, b^c in next round 412 eor x16,x16,x7,ror#18 // Sigma1(e) 413 ror x7,x24,#28 414 add x23,x23,x17 // h+=Ch(e,f,g) 415 eor x17,x24,x24,ror#5 416 add x23,x23,x16 // h+=Sigma1(e) 417 and x28,x28,x19 // (b^c)&=(a^b) 418 add x27,x27,x23 // d+=h 419 eor x28,x28,x25 // Maj(a,b,c) 420 eor x17,x7,x17,ror#34 // Sigma0(a) 421 add x23,x23,x28 // h+=Maj(a,b,c) 422 ldr x28,[x30],#8 // *K++, x19 in next round 423 //add x23,x23,x17 // h+=Sigma0(a) 424#ifndef __AARCH64EB__ 425 rev x0,x0 // 13 426#endif 427 ldp x1,x2,[x1] 428 add x23,x23,x17 // h+=Sigma0(a) 429 str x8,[sp,#8] 430 ror x16,x27,#14 431 add x22,x22,x28 // h+=K[i] 432 eor x8,x27,x27,ror#23 433 and x17,x20,x27 434 bic x28,x21,x27 435 add x22,x22,x0 // h+=X[i] 436 orr x17,x17,x28 // Ch(e,f,g) 437 eor x28,x23,x24 // a^b, b^c in next round 438 eor x16,x16,x8,ror#18 // Sigma1(e) 439 ror x8,x23,#28 440 add x22,x22,x17 // h+=Ch(e,f,g) 441 eor x17,x23,x23,ror#5 442 add x22,x22,x16 // h+=Sigma1(e) 443 and x19,x19,x28 // (b^c)&=(a^b) 444 add x26,x26,x22 // d+=h 445 eor x19,x19,x24 // Maj(a,b,c) 446 eor x17,x8,x17,ror#34 // Sigma0(a) 447 add x22,x22,x19 // h+=Maj(a,b,c) 448 ldr x19,[x30],#8 // *K++, x28 in next round 449 //add x22,x22,x17 // h+=Sigma0(a) 450#ifndef __AARCH64EB__ 451 rev x1,x1 // 14 452#endif 453 ldr x6,[sp,#24] 454 add x22,x22,x17 // h+=Sigma0(a) 455 str x9,[sp,#16] 456 ror x16,x26,#14 457 add x21,x21,x19 // h+=K[i] 458 eor x9,x26,x26,ror#23 459 and x17,x27,x26 460 bic x19,x20,x26 461 add x21,x21,x1 // h+=X[i] 462 orr x17,x17,x19 // Ch(e,f,g) 463 eor x19,x22,x23 // a^b, b^c in next round 464 eor x16,x16,x9,ror#18 // Sigma1(e) 465 ror x9,x22,#28 466 add x21,x21,x17 // h+=Ch(e,f,g) 467 eor x17,x22,x22,ror#5 468 add x21,x21,x16 // h+=Sigma1(e) 469 and x28,x28,x19 // (b^c)&=(a^b) 470 add x25,x25,x21 // d+=h 471 eor x28,x28,x23 // Maj(a,b,c) 472 eor x17,x9,x17,ror#34 // Sigma0(a) 473 add x21,x21,x28 // h+=Maj(a,b,c) 474 ldr x28,[x30],#8 // *K++, x19 in next round 475 //add x21,x21,x17 // h+=Sigma0(a) 476#ifndef __AARCH64EB__ 477 rev x2,x2 // 15 478#endif 479 ldr x7,[sp,#0] 480 add x21,x21,x17 // h+=Sigma0(a) 481 str x10,[sp,#24] 482 ror x16,x25,#14 483 add x20,x20,x28 // h+=K[i] 484 ror x9,x4,#1 485 and x17,x26,x25 486 ror x8,x1,#19 487 bic x28,x27,x25 488 ror x10,x21,#28 489 add x20,x20,x2 // h+=X[i] 490 eor x16,x16,x25,ror#18 491 eor x9,x9,x4,ror#8 492 orr x17,x17,x28 // Ch(e,f,g) 493 eor x28,x21,x22 // a^b, b^c in next round 494 eor x16,x16,x25,ror#41 // Sigma1(e) 495 eor x10,x10,x21,ror#34 496 add x20,x20,x17 // h+=Ch(e,f,g) 497 and x19,x19,x28 // (b^c)&=(a^b) 498 eor x8,x8,x1,ror#61 499 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 500 add x20,x20,x16 // h+=Sigma1(e) 501 eor x19,x19,x22 // Maj(a,b,c) 502 eor x17,x10,x21,ror#39 // Sigma0(a) 503 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 504 add x3,x3,x12 505 add x24,x24,x20 // d+=h 506 add x20,x20,x19 // h+=Maj(a,b,c) 507 ldr x19,[x30],#8 // *K++, x28 in next round 508 add x3,x3,x9 509 add x20,x20,x17 // h+=Sigma0(a) 510 add x3,x3,x8 511.Loop_16_xx: 512 ldr x8,[sp,#8] 513 str x11,[sp,#0] 514 ror x16,x24,#14 515 add x27,x27,x19 // h+=K[i] 516 ror x10,x5,#1 517 and x17,x25,x24 518 ror x9,x2,#19 519 bic x19,x26,x24 520 ror x11,x20,#28 521 add x27,x27,x3 // h+=X[i] 522 eor x16,x16,x24,ror#18 523 eor x10,x10,x5,ror#8 524 orr x17,x17,x19 // Ch(e,f,g) 525 eor x19,x20,x21 // a^b, b^c in next round 526 eor x16,x16,x24,ror#41 // Sigma1(e) 527 eor x11,x11,x20,ror#34 528 add x27,x27,x17 // h+=Ch(e,f,g) 529 and x28,x28,x19 // (b^c)&=(a^b) 530 eor x9,x9,x2,ror#61 531 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 532 add x27,x27,x16 // h+=Sigma1(e) 533 eor x28,x28,x21 // Maj(a,b,c) 534 eor x17,x11,x20,ror#39 // Sigma0(a) 535 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 536 add x4,x4,x13 537 add x23,x23,x27 // d+=h 538 add x27,x27,x28 // h+=Maj(a,b,c) 539 ldr x28,[x30],#8 // *K++, x19 in next round 540 add x4,x4,x10 541 add x27,x27,x17 // h+=Sigma0(a) 542 add x4,x4,x9 543 ldr x9,[sp,#16] 544 str x12,[sp,#8] 545 ror x16,x23,#14 546 add x26,x26,x28 // h+=K[i] 547 ror x11,x6,#1 548 and x17,x24,x23 549 ror x10,x3,#19 550 bic x28,x25,x23 551 ror x12,x27,#28 552 add x26,x26,x4 // h+=X[i] 553 eor x16,x16,x23,ror#18 554 eor x11,x11,x6,ror#8 555 orr x17,x17,x28 // Ch(e,f,g) 556 eor x28,x27,x20 // a^b, b^c in next round 557 eor x16,x16,x23,ror#41 // Sigma1(e) 558 eor x12,x12,x27,ror#34 559 add x26,x26,x17 // h+=Ch(e,f,g) 560 and x19,x19,x28 // (b^c)&=(a^b) 561 eor x10,x10,x3,ror#61 562 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 563 add x26,x26,x16 // h+=Sigma1(e) 564 eor x19,x19,x20 // Maj(a,b,c) 565 eor x17,x12,x27,ror#39 // Sigma0(a) 566 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 567 add x5,x5,x14 568 add x22,x22,x26 // d+=h 569 add x26,x26,x19 // h+=Maj(a,b,c) 570 ldr x19,[x30],#8 // *K++, x28 in next round 571 add x5,x5,x11 572 add x26,x26,x17 // h+=Sigma0(a) 573 add x5,x5,x10 574 ldr x10,[sp,#24] 575 str x13,[sp,#16] 576 ror x16,x22,#14 577 add x25,x25,x19 // h+=K[i] 578 ror x12,x7,#1 579 and x17,x23,x22 580 ror x11,x4,#19 581 bic x19,x24,x22 582 ror x13,x26,#28 583 add x25,x25,x5 // h+=X[i] 584 eor x16,x16,x22,ror#18 585 eor x12,x12,x7,ror#8 586 orr x17,x17,x19 // Ch(e,f,g) 587 eor x19,x26,x27 // a^b, b^c in next round 588 eor x16,x16,x22,ror#41 // Sigma1(e) 589 eor x13,x13,x26,ror#34 590 add x25,x25,x17 // h+=Ch(e,f,g) 591 and x28,x28,x19 // (b^c)&=(a^b) 592 eor x11,x11,x4,ror#61 593 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 594 add x25,x25,x16 // h+=Sigma1(e) 595 eor x28,x28,x27 // Maj(a,b,c) 596 eor x17,x13,x26,ror#39 // Sigma0(a) 597 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 598 add x6,x6,x15 599 add x21,x21,x25 // d+=h 600 add x25,x25,x28 // h+=Maj(a,b,c) 601 ldr x28,[x30],#8 // *K++, x19 in next round 602 add x6,x6,x12 603 add x25,x25,x17 // h+=Sigma0(a) 604 add x6,x6,x11 605 ldr x11,[sp,#0] 606 str x14,[sp,#24] 607 ror x16,x21,#14 608 add x24,x24,x28 // h+=K[i] 609 ror x13,x8,#1 610 and x17,x22,x21 611 ror x12,x5,#19 612 bic x28,x23,x21 613 ror x14,x25,#28 614 add x24,x24,x6 // h+=X[i] 615 eor x16,x16,x21,ror#18 616 eor x13,x13,x8,ror#8 617 orr x17,x17,x28 // Ch(e,f,g) 618 eor x28,x25,x26 // a^b, b^c in next round 619 eor x16,x16,x21,ror#41 // Sigma1(e) 620 eor x14,x14,x25,ror#34 621 add x24,x24,x17 // h+=Ch(e,f,g) 622 and x19,x19,x28 // (b^c)&=(a^b) 623 eor x12,x12,x5,ror#61 624 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 625 add x24,x24,x16 // h+=Sigma1(e) 626 eor x19,x19,x26 // Maj(a,b,c) 627 eor x17,x14,x25,ror#39 // Sigma0(a) 628 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 629 add x7,x7,x0 630 add x20,x20,x24 // d+=h 631 add x24,x24,x19 // h+=Maj(a,b,c) 632 ldr x19,[x30],#8 // *K++, x28 in next round 633 add x7,x7,x13 634 add x24,x24,x17 // h+=Sigma0(a) 635 add x7,x7,x12 636 ldr x12,[sp,#8] 637 str x15,[sp,#0] 638 ror x16,x20,#14 639 add x23,x23,x19 // h+=K[i] 640 ror x14,x9,#1 641 and x17,x21,x20 642 ror x13,x6,#19 643 bic x19,x22,x20 644 ror x15,x24,#28 645 add x23,x23,x7 // h+=X[i] 646 eor x16,x16,x20,ror#18 647 eor x14,x14,x9,ror#8 648 orr x17,x17,x19 // Ch(e,f,g) 649 eor x19,x24,x25 // a^b, b^c in next round 650 eor x16,x16,x20,ror#41 // Sigma1(e) 651 eor x15,x15,x24,ror#34 652 add x23,x23,x17 // h+=Ch(e,f,g) 653 and x28,x28,x19 // (b^c)&=(a^b) 654 eor x13,x13,x6,ror#61 655 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 656 add x23,x23,x16 // h+=Sigma1(e) 657 eor x28,x28,x25 // Maj(a,b,c) 658 eor x17,x15,x24,ror#39 // Sigma0(a) 659 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 660 add x8,x8,x1 661 add x27,x27,x23 // d+=h 662 add x23,x23,x28 // h+=Maj(a,b,c) 663 ldr x28,[x30],#8 // *K++, x19 in next round 664 add x8,x8,x14 665 add x23,x23,x17 // h+=Sigma0(a) 666 add x8,x8,x13 667 ldr x13,[sp,#16] 668 str x0,[sp,#8] 669 ror x16,x27,#14 670 add x22,x22,x28 // h+=K[i] 671 ror x15,x10,#1 672 and x17,x20,x27 673 ror x14,x7,#19 674 bic x28,x21,x27 675 ror x0,x23,#28 676 add x22,x22,x8 // h+=X[i] 677 eor x16,x16,x27,ror#18 678 eor x15,x15,x10,ror#8 679 orr x17,x17,x28 // Ch(e,f,g) 680 eor x28,x23,x24 // a^b, b^c in next round 681 eor x16,x16,x27,ror#41 // Sigma1(e) 682 eor x0,x0,x23,ror#34 683 add x22,x22,x17 // h+=Ch(e,f,g) 684 and x19,x19,x28 // (b^c)&=(a^b) 685 eor x14,x14,x7,ror#61 686 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 687 add x22,x22,x16 // h+=Sigma1(e) 688 eor x19,x19,x24 // Maj(a,b,c) 689 eor x17,x0,x23,ror#39 // Sigma0(a) 690 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 691 add x9,x9,x2 692 add x26,x26,x22 // d+=h 693 add x22,x22,x19 // h+=Maj(a,b,c) 694 ldr x19,[x30],#8 // *K++, x28 in next round 695 add x9,x9,x15 696 add x22,x22,x17 // h+=Sigma0(a) 697 add x9,x9,x14 698 ldr x14,[sp,#24] 699 str x1,[sp,#16] 700 ror x16,x26,#14 701 add x21,x21,x19 // h+=K[i] 702 ror x0,x11,#1 703 and x17,x27,x26 704 ror x15,x8,#19 705 bic x19,x20,x26 706 ror x1,x22,#28 707 add x21,x21,x9 // h+=X[i] 708 eor x16,x16,x26,ror#18 709 eor x0,x0,x11,ror#8 710 orr x17,x17,x19 // Ch(e,f,g) 711 eor x19,x22,x23 // a^b, b^c in next round 712 eor x16,x16,x26,ror#41 // Sigma1(e) 713 eor x1,x1,x22,ror#34 714 add x21,x21,x17 // h+=Ch(e,f,g) 715 and x28,x28,x19 // (b^c)&=(a^b) 716 eor x15,x15,x8,ror#61 717 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 718 add x21,x21,x16 // h+=Sigma1(e) 719 eor x28,x28,x23 // Maj(a,b,c) 720 eor x17,x1,x22,ror#39 // Sigma0(a) 721 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 722 add x10,x10,x3 723 add x25,x25,x21 // d+=h 724 add x21,x21,x28 // h+=Maj(a,b,c) 725 ldr x28,[x30],#8 // *K++, x19 in next round 726 add x10,x10,x0 727 add x21,x21,x17 // h+=Sigma0(a) 728 add x10,x10,x15 729 ldr x15,[sp,#0] 730 str x2,[sp,#24] 731 ror x16,x25,#14 732 add x20,x20,x28 // h+=K[i] 733 ror x1,x12,#1 734 and x17,x26,x25 735 ror x0,x9,#19 736 bic x28,x27,x25 737 ror x2,x21,#28 738 add x20,x20,x10 // h+=X[i] 739 eor x16,x16,x25,ror#18 740 eor x1,x1,x12,ror#8 741 orr x17,x17,x28 // Ch(e,f,g) 742 eor x28,x21,x22 // a^b, b^c in next round 743 eor x16,x16,x25,ror#41 // Sigma1(e) 744 eor x2,x2,x21,ror#34 745 add x20,x20,x17 // h+=Ch(e,f,g) 746 and x19,x19,x28 // (b^c)&=(a^b) 747 eor x0,x0,x9,ror#61 748 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 749 add x20,x20,x16 // h+=Sigma1(e) 750 eor x19,x19,x22 // Maj(a,b,c) 751 eor x17,x2,x21,ror#39 // Sigma0(a) 752 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 753 add x11,x11,x4 754 add x24,x24,x20 // d+=h 755 add x20,x20,x19 // h+=Maj(a,b,c) 756 ldr x19,[x30],#8 // *K++, x28 in next round 757 add x11,x11,x1 758 add x20,x20,x17 // h+=Sigma0(a) 759 add x11,x11,x0 760 ldr x0,[sp,#8] 761 str x3,[sp,#0] 762 ror x16,x24,#14 763 add x27,x27,x19 // h+=K[i] 764 ror x2,x13,#1 765 and x17,x25,x24 766 ror x1,x10,#19 767 bic x19,x26,x24 768 ror x3,x20,#28 769 add x27,x27,x11 // h+=X[i] 770 eor x16,x16,x24,ror#18 771 eor x2,x2,x13,ror#8 772 orr x17,x17,x19 // Ch(e,f,g) 773 eor x19,x20,x21 // a^b, b^c in next round 774 eor x16,x16,x24,ror#41 // Sigma1(e) 775 eor x3,x3,x20,ror#34 776 add x27,x27,x17 // h+=Ch(e,f,g) 777 and x28,x28,x19 // (b^c)&=(a^b) 778 eor x1,x1,x10,ror#61 779 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 780 add x27,x27,x16 // h+=Sigma1(e) 781 eor x28,x28,x21 // Maj(a,b,c) 782 eor x17,x3,x20,ror#39 // Sigma0(a) 783 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 784 add x12,x12,x5 785 add x23,x23,x27 // d+=h 786 add x27,x27,x28 // h+=Maj(a,b,c) 787 ldr x28,[x30],#8 // *K++, x19 in next round 788 add x12,x12,x2 789 add x27,x27,x17 // h+=Sigma0(a) 790 add x12,x12,x1 791 ldr x1,[sp,#16] 792 str x4,[sp,#8] 793 ror x16,x23,#14 794 add x26,x26,x28 // h+=K[i] 795 ror x3,x14,#1 796 and x17,x24,x23 797 ror x2,x11,#19 798 bic x28,x25,x23 799 ror x4,x27,#28 800 add x26,x26,x12 // h+=X[i] 801 eor x16,x16,x23,ror#18 802 eor x3,x3,x14,ror#8 803 orr x17,x17,x28 // Ch(e,f,g) 804 eor x28,x27,x20 // a^b, b^c in next round 805 eor x16,x16,x23,ror#41 // Sigma1(e) 806 eor x4,x4,x27,ror#34 807 add x26,x26,x17 // h+=Ch(e,f,g) 808 and x19,x19,x28 // (b^c)&=(a^b) 809 eor x2,x2,x11,ror#61 810 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 811 add x26,x26,x16 // h+=Sigma1(e) 812 eor x19,x19,x20 // Maj(a,b,c) 813 eor x17,x4,x27,ror#39 // Sigma0(a) 814 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 815 add x13,x13,x6 816 add x22,x22,x26 // d+=h 817 add x26,x26,x19 // h+=Maj(a,b,c) 818 ldr x19,[x30],#8 // *K++, x28 in next round 819 add x13,x13,x3 820 add x26,x26,x17 // h+=Sigma0(a) 821 add x13,x13,x2 822 ldr x2,[sp,#24] 823 str x5,[sp,#16] 824 ror x16,x22,#14 825 add x25,x25,x19 // h+=K[i] 826 ror x4,x15,#1 827 and x17,x23,x22 828 ror x3,x12,#19 829 bic x19,x24,x22 830 ror x5,x26,#28 831 add x25,x25,x13 // h+=X[i] 832 eor x16,x16,x22,ror#18 833 eor x4,x4,x15,ror#8 834 orr x17,x17,x19 // Ch(e,f,g) 835 eor x19,x26,x27 // a^b, b^c in next round 836 eor x16,x16,x22,ror#41 // Sigma1(e) 837 eor x5,x5,x26,ror#34 838 add x25,x25,x17 // h+=Ch(e,f,g) 839 and x28,x28,x19 // (b^c)&=(a^b) 840 eor x3,x3,x12,ror#61 841 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 842 add x25,x25,x16 // h+=Sigma1(e) 843 eor x28,x28,x27 // Maj(a,b,c) 844 eor x17,x5,x26,ror#39 // Sigma0(a) 845 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 846 add x14,x14,x7 847 add x21,x21,x25 // d+=h 848 add x25,x25,x28 // h+=Maj(a,b,c) 849 ldr x28,[x30],#8 // *K++, x19 in next round 850 add x14,x14,x4 851 add x25,x25,x17 // h+=Sigma0(a) 852 add x14,x14,x3 853 ldr x3,[sp,#0] 854 str x6,[sp,#24] 855 ror x16,x21,#14 856 add x24,x24,x28 // h+=K[i] 857 ror x5,x0,#1 858 and x17,x22,x21 859 ror x4,x13,#19 860 bic x28,x23,x21 861 ror x6,x25,#28 862 add x24,x24,x14 // h+=X[i] 863 eor x16,x16,x21,ror#18 864 eor x5,x5,x0,ror#8 865 orr x17,x17,x28 // Ch(e,f,g) 866 eor x28,x25,x26 // a^b, b^c in next round 867 eor x16,x16,x21,ror#41 // Sigma1(e) 868 eor x6,x6,x25,ror#34 869 add x24,x24,x17 // h+=Ch(e,f,g) 870 and x19,x19,x28 // (b^c)&=(a^b) 871 eor x4,x4,x13,ror#61 872 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 873 add x24,x24,x16 // h+=Sigma1(e) 874 eor x19,x19,x26 // Maj(a,b,c) 875 eor x17,x6,x25,ror#39 // Sigma0(a) 876 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 877 add x15,x15,x8 878 add x20,x20,x24 // d+=h 879 add x24,x24,x19 // h+=Maj(a,b,c) 880 ldr x19,[x30],#8 // *K++, x28 in next round 881 add x15,x15,x5 882 add x24,x24,x17 // h+=Sigma0(a) 883 add x15,x15,x4 884 ldr x4,[sp,#8] 885 str x7,[sp,#0] 886 ror x16,x20,#14 887 add x23,x23,x19 // h+=K[i] 888 ror x6,x1,#1 889 and x17,x21,x20 890 ror x5,x14,#19 891 bic x19,x22,x20 892 ror x7,x24,#28 893 add x23,x23,x15 // h+=X[i] 894 eor x16,x16,x20,ror#18 895 eor x6,x6,x1,ror#8 896 orr x17,x17,x19 // Ch(e,f,g) 897 eor x19,x24,x25 // a^b, b^c in next round 898 eor x16,x16,x20,ror#41 // Sigma1(e) 899 eor x7,x7,x24,ror#34 900 add x23,x23,x17 // h+=Ch(e,f,g) 901 and x28,x28,x19 // (b^c)&=(a^b) 902 eor x5,x5,x14,ror#61 903 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 904 add x23,x23,x16 // h+=Sigma1(e) 905 eor x28,x28,x25 // Maj(a,b,c) 906 eor x17,x7,x24,ror#39 // Sigma0(a) 907 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 908 add x0,x0,x9 909 add x27,x27,x23 // d+=h 910 add x23,x23,x28 // h+=Maj(a,b,c) 911 ldr x28,[x30],#8 // *K++, x19 in next round 912 add x0,x0,x6 913 add x23,x23,x17 // h+=Sigma0(a) 914 add x0,x0,x5 915 ldr x5,[sp,#16] 916 str x8,[sp,#8] 917 ror x16,x27,#14 918 add x22,x22,x28 // h+=K[i] 919 ror x7,x2,#1 920 and x17,x20,x27 921 ror x6,x15,#19 922 bic x28,x21,x27 923 ror x8,x23,#28 924 add x22,x22,x0 // h+=X[i] 925 eor x16,x16,x27,ror#18 926 eor x7,x7,x2,ror#8 927 orr x17,x17,x28 // Ch(e,f,g) 928 eor x28,x23,x24 // a^b, b^c in next round 929 eor x16,x16,x27,ror#41 // Sigma1(e) 930 eor x8,x8,x23,ror#34 931 add x22,x22,x17 // h+=Ch(e,f,g) 932 and x19,x19,x28 // (b^c)&=(a^b) 933 eor x6,x6,x15,ror#61 934 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 935 add x22,x22,x16 // h+=Sigma1(e) 936 eor x19,x19,x24 // Maj(a,b,c) 937 eor x17,x8,x23,ror#39 // Sigma0(a) 938 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 939 add x1,x1,x10 940 add x26,x26,x22 // d+=h 941 add x22,x22,x19 // h+=Maj(a,b,c) 942 ldr x19,[x30],#8 // *K++, x28 in next round 943 add x1,x1,x7 944 add x22,x22,x17 // h+=Sigma0(a) 945 add x1,x1,x6 946 ldr x6,[sp,#24] 947 str x9,[sp,#16] 948 ror x16,x26,#14 949 add x21,x21,x19 // h+=K[i] 950 ror x8,x3,#1 951 and x17,x27,x26 952 ror x7,x0,#19 953 bic x19,x20,x26 954 ror x9,x22,#28 955 add x21,x21,x1 // h+=X[i] 956 eor x16,x16,x26,ror#18 957 eor x8,x8,x3,ror#8 958 orr x17,x17,x19 // Ch(e,f,g) 959 eor x19,x22,x23 // a^b, b^c in next round 960 eor x16,x16,x26,ror#41 // Sigma1(e) 961 eor x9,x9,x22,ror#34 962 add x21,x21,x17 // h+=Ch(e,f,g) 963 and x28,x28,x19 // (b^c)&=(a^b) 964 eor x7,x7,x0,ror#61 965 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 966 add x21,x21,x16 // h+=Sigma1(e) 967 eor x28,x28,x23 // Maj(a,b,c) 968 eor x17,x9,x22,ror#39 // Sigma0(a) 969 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 970 add x2,x2,x11 971 add x25,x25,x21 // d+=h 972 add x21,x21,x28 // h+=Maj(a,b,c) 973 ldr x28,[x30],#8 // *K++, x19 in next round 974 add x2,x2,x8 975 add x21,x21,x17 // h+=Sigma0(a) 976 add x2,x2,x7 977 ldr x7,[sp,#0] 978 str x10,[sp,#24] 979 ror x16,x25,#14 980 add x20,x20,x28 // h+=K[i] 981 ror x9,x4,#1 982 and x17,x26,x25 983 ror x8,x1,#19 984 bic x28,x27,x25 985 ror x10,x21,#28 986 add x20,x20,x2 // h+=X[i] 987 eor x16,x16,x25,ror#18 988 eor x9,x9,x4,ror#8 989 orr x17,x17,x28 // Ch(e,f,g) 990 eor x28,x21,x22 // a^b, b^c in next round 991 eor x16,x16,x25,ror#41 // Sigma1(e) 992 eor x10,x10,x21,ror#34 993 add x20,x20,x17 // h+=Ch(e,f,g) 994 and x19,x19,x28 // (b^c)&=(a^b) 995 eor x8,x8,x1,ror#61 996 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 997 add x20,x20,x16 // h+=Sigma1(e) 998 eor x19,x19,x22 // Maj(a,b,c) 999 eor x17,x10,x21,ror#39 // Sigma0(a) 1000 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1001 add x3,x3,x12 1002 add x24,x24,x20 // d+=h 1003 add x20,x20,x19 // h+=Maj(a,b,c) 1004 ldr x19,[x30],#8 // *K++, x28 in next round 1005 add x3,x3,x9 1006 add x20,x20,x17 // h+=Sigma0(a) 1007 add x3,x3,x8 1008 cbnz x19,.Loop_16_xx 1009 1010 ldp x0,x2,[x29,#96] 1011 ldr x1,[x29,#112] 1012 sub x30,x30,#648 // rewind 1013 1014 ldp x3,x4,[x0] 1015 ldp x5,x6,[x0,#2*8] 1016 add x1,x1,#14*8 // advance input pointer 1017 ldp x7,x8,[x0,#4*8] 1018 add x20,x20,x3 1019 ldp x9,x10,[x0,#6*8] 1020 add x21,x21,x4 1021 add x22,x22,x5 1022 add x23,x23,x6 1023 stp x20,x21,[x0] 1024 add x24,x24,x7 1025 add x25,x25,x8 1026 stp x22,x23,[x0,#2*8] 1027 add x26,x26,x9 1028 add x27,x27,x10 1029 cmp x1,x2 1030 stp x24,x25,[x0,#4*8] 1031 stp x26,x27,[x0,#6*8] 1032 b.ne .Loop 1033 1034 ldp x19,x20,[x29,#16] 1035 add sp,sp,#4*8 1036 ldp x21,x22,[x29,#32] 1037 ldp x23,x24,[x29,#48] 1038 ldp x25,x26,[x29,#64] 1039 ldp x27,x28,[x29,#80] 1040 ldp x29,x30,[sp],#128 1041.inst 0xd50323bf // autiasp 1042 ret 1043.size sha512_block_data_order,.-sha512_block_data_order 1044 1045.align 6 1046.type .LK512,%object 1047.LK512: 1048.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1049.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1050.quad 0x3956c25bf348b538,0x59f111f1b605d019 1051.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1052.quad 0xd807aa98a3030242,0x12835b0145706fbe 1053.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1054.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1055.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1056.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1057.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1058.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1059.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1060.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1061.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1062.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1063.quad 0x06ca6351e003826f,0x142929670a0e6e70 1064.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1065.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1066.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1067.quad 0x81c2c92e47edaee6,0x92722c851482353b 1068.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1069.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1070.quad 0xd192e819d6ef5218,0xd69906245565a910 1071.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1072.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1073.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1074.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1075.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1076.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1077.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1078.quad 0x90befffa23631e28,0xa4506cebde82bde9 1079.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1080.quad 0xca273eceea26619c,0xd186b8c721c0c207 1081.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1082.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1083.quad 0x113f9804bef90dae,0x1b710b35131c471b 1084.quad 0x28db77f523047d84,0x32caab7b40c72493 1085.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1086.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1087.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1088.quad 0 // terminator 1089.size .LK512,.-.LK512 1090#ifndef __KERNEL__ 1091.align 3 1092.LOPENSSL_armcap_P: 1093# ifdef __ILP32__ 1094.long OPENSSL_armcap_P-. 1095# else 1096.quad OPENSSL_armcap_P-. 1097# endif 1098#endif 1099.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1100.align 2 1101.align 2 1102#ifndef __KERNEL__ 1103.type sha512_block_armv8,%function 1104.align 6 1105sha512_block_armv8: 1106.Lv8_entry: 1107 stp x29,x30,[sp,#-16]! 1108 add x29,sp,#0 1109 1110 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1111 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1112 1113 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1114 adr x3,.LK512 1115 1116 rev64 v16.16b,v16.16b 1117 rev64 v17.16b,v17.16b 1118 rev64 v18.16b,v18.16b 1119 rev64 v19.16b,v19.16b 1120 rev64 v20.16b,v20.16b 1121 rev64 v21.16b,v21.16b 1122 rev64 v22.16b,v22.16b 1123 rev64 v23.16b,v23.16b 1124 b .Loop_hw 1125 1126.align 4 1127.Loop_hw: 1128 ld1 {v24.2d},[x3],#16 1129 subs x2,x2,#1 1130 sub x4,x1,#128 1131 orr v26.16b,v0.16b,v0.16b // offload 1132 orr v27.16b,v1.16b,v1.16b 1133 orr v28.16b,v2.16b,v2.16b 1134 orr v29.16b,v3.16b,v3.16b 1135 csel x1,x1,x4,ne // conditional rewind 1136 add v24.2d,v24.2d,v16.2d 1137 ld1 {v25.2d},[x3],#16 1138 ext v24.16b,v24.16b,v24.16b,#8 1139 ext v5.16b,v2.16b,v3.16b,#8 1140 ext v6.16b,v1.16b,v2.16b,#8 1141 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1142.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1143 ext v7.16b,v20.16b,v21.16b,#8 1144.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1145.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1146 add v4.2d,v1.2d,v3.2d // "D + T1" 1147.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1148 add v25.2d,v25.2d,v17.2d 1149 ld1 {v24.2d},[x3],#16 1150 ext v25.16b,v25.16b,v25.16b,#8 1151 ext v5.16b,v4.16b,v2.16b,#8 1152 ext v6.16b,v0.16b,v4.16b,#8 1153 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1154.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1155 ext v7.16b,v21.16b,v22.16b,#8 1156.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1157.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1158 add v1.2d,v0.2d,v2.2d // "D + T1" 1159.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1160 add v24.2d,v24.2d,v18.2d 1161 ld1 {v25.2d},[x3],#16 1162 ext v24.16b,v24.16b,v24.16b,#8 1163 ext v5.16b,v1.16b,v4.16b,#8 1164 ext v6.16b,v3.16b,v1.16b,#8 1165 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1166.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1167 ext v7.16b,v22.16b,v23.16b,#8 1168.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1169.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1170 add v0.2d,v3.2d,v4.2d // "D + T1" 1171.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1172 add v25.2d,v25.2d,v19.2d 1173 ld1 {v24.2d},[x3],#16 1174 ext v25.16b,v25.16b,v25.16b,#8 1175 ext v5.16b,v0.16b,v1.16b,#8 1176 ext v6.16b,v2.16b,v0.16b,#8 1177 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1178.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1179 ext v7.16b,v23.16b,v16.16b,#8 1180.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1181.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1182 add v3.2d,v2.2d,v1.2d // "D + T1" 1183.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1184 add v24.2d,v24.2d,v20.2d 1185 ld1 {v25.2d},[x3],#16 1186 ext v24.16b,v24.16b,v24.16b,#8 1187 ext v5.16b,v3.16b,v0.16b,#8 1188 ext v6.16b,v4.16b,v3.16b,#8 1189 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1190.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1191 ext v7.16b,v16.16b,v17.16b,#8 1192.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1193.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1194 add v2.2d,v4.2d,v0.2d // "D + T1" 1195.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1196 add v25.2d,v25.2d,v21.2d 1197 ld1 {v24.2d},[x3],#16 1198 ext v25.16b,v25.16b,v25.16b,#8 1199 ext v5.16b,v2.16b,v3.16b,#8 1200 ext v6.16b,v1.16b,v2.16b,#8 1201 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1202.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1203 ext v7.16b,v17.16b,v18.16b,#8 1204.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1205.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1206 add v4.2d,v1.2d,v3.2d // "D + T1" 1207.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1208 add v24.2d,v24.2d,v22.2d 1209 ld1 {v25.2d},[x3],#16 1210 ext v24.16b,v24.16b,v24.16b,#8 1211 ext v5.16b,v4.16b,v2.16b,#8 1212 ext v6.16b,v0.16b,v4.16b,#8 1213 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1214.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1215 ext v7.16b,v18.16b,v19.16b,#8 1216.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1217.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1218 add v1.2d,v0.2d,v2.2d // "D + T1" 1219.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1220 add v25.2d,v25.2d,v23.2d 1221 ld1 {v24.2d},[x3],#16 1222 ext v25.16b,v25.16b,v25.16b,#8 1223 ext v5.16b,v1.16b,v4.16b,#8 1224 ext v6.16b,v3.16b,v1.16b,#8 1225 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1226.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1227 ext v7.16b,v19.16b,v20.16b,#8 1228.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1229.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1230 add v0.2d,v3.2d,v4.2d // "D + T1" 1231.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1232 add v24.2d,v24.2d,v16.2d 1233 ld1 {v25.2d},[x3],#16 1234 ext v24.16b,v24.16b,v24.16b,#8 1235 ext v5.16b,v0.16b,v1.16b,#8 1236 ext v6.16b,v2.16b,v0.16b,#8 1237 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1238.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1239 ext v7.16b,v20.16b,v21.16b,#8 1240.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1241.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1242 add v3.2d,v2.2d,v1.2d // "D + T1" 1243.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1244 add v25.2d,v25.2d,v17.2d 1245 ld1 {v24.2d},[x3],#16 1246 ext v25.16b,v25.16b,v25.16b,#8 1247 ext v5.16b,v3.16b,v0.16b,#8 1248 ext v6.16b,v4.16b,v3.16b,#8 1249 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1250.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1251 ext v7.16b,v21.16b,v22.16b,#8 1252.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1253.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1254 add v2.2d,v4.2d,v0.2d // "D + T1" 1255.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1256 add v24.2d,v24.2d,v18.2d 1257 ld1 {v25.2d},[x3],#16 1258 ext v24.16b,v24.16b,v24.16b,#8 1259 ext v5.16b,v2.16b,v3.16b,#8 1260 ext v6.16b,v1.16b,v2.16b,#8 1261 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1262.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1263 ext v7.16b,v22.16b,v23.16b,#8 1264.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1265.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1266 add v4.2d,v1.2d,v3.2d // "D + T1" 1267.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1268 add v25.2d,v25.2d,v19.2d 1269 ld1 {v24.2d},[x3],#16 1270 ext v25.16b,v25.16b,v25.16b,#8 1271 ext v5.16b,v4.16b,v2.16b,#8 1272 ext v6.16b,v0.16b,v4.16b,#8 1273 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1274.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1275 ext v7.16b,v23.16b,v16.16b,#8 1276.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1277.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1278 add v1.2d,v0.2d,v2.2d // "D + T1" 1279.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1280 add v24.2d,v24.2d,v20.2d 1281 ld1 {v25.2d},[x3],#16 1282 ext v24.16b,v24.16b,v24.16b,#8 1283 ext v5.16b,v1.16b,v4.16b,#8 1284 ext v6.16b,v3.16b,v1.16b,#8 1285 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1286.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1287 ext v7.16b,v16.16b,v17.16b,#8 1288.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1289.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1290 add v0.2d,v3.2d,v4.2d // "D + T1" 1291.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1292 add v25.2d,v25.2d,v21.2d 1293 ld1 {v24.2d},[x3],#16 1294 ext v25.16b,v25.16b,v25.16b,#8 1295 ext v5.16b,v0.16b,v1.16b,#8 1296 ext v6.16b,v2.16b,v0.16b,#8 1297 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1298.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1299 ext v7.16b,v17.16b,v18.16b,#8 1300.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1301.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1302 add v3.2d,v2.2d,v1.2d // "D + T1" 1303.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1304 add v24.2d,v24.2d,v22.2d 1305 ld1 {v25.2d},[x3],#16 1306 ext v24.16b,v24.16b,v24.16b,#8 1307 ext v5.16b,v3.16b,v0.16b,#8 1308 ext v6.16b,v4.16b,v3.16b,#8 1309 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1310.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1311 ext v7.16b,v18.16b,v19.16b,#8 1312.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1313.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1314 add v2.2d,v4.2d,v0.2d // "D + T1" 1315.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1316 add v25.2d,v25.2d,v23.2d 1317 ld1 {v24.2d},[x3],#16 1318 ext v25.16b,v25.16b,v25.16b,#8 1319 ext v5.16b,v2.16b,v3.16b,#8 1320 ext v6.16b,v1.16b,v2.16b,#8 1321 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1322.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1323 ext v7.16b,v19.16b,v20.16b,#8 1324.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1325.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1326 add v4.2d,v1.2d,v3.2d // "D + T1" 1327.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1328 add v24.2d,v24.2d,v16.2d 1329 ld1 {v25.2d},[x3],#16 1330 ext v24.16b,v24.16b,v24.16b,#8 1331 ext v5.16b,v4.16b,v2.16b,#8 1332 ext v6.16b,v0.16b,v4.16b,#8 1333 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1334.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1335 ext v7.16b,v20.16b,v21.16b,#8 1336.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1337.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1338 add v1.2d,v0.2d,v2.2d // "D + T1" 1339.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1340 add v25.2d,v25.2d,v17.2d 1341 ld1 {v24.2d},[x3],#16 1342 ext v25.16b,v25.16b,v25.16b,#8 1343 ext v5.16b,v1.16b,v4.16b,#8 1344 ext v6.16b,v3.16b,v1.16b,#8 1345 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1346.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1347 ext v7.16b,v21.16b,v22.16b,#8 1348.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1349.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1350 add v0.2d,v3.2d,v4.2d // "D + T1" 1351.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1352 add v24.2d,v24.2d,v18.2d 1353 ld1 {v25.2d},[x3],#16 1354 ext v24.16b,v24.16b,v24.16b,#8 1355 ext v5.16b,v0.16b,v1.16b,#8 1356 ext v6.16b,v2.16b,v0.16b,#8 1357 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1358.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1359 ext v7.16b,v22.16b,v23.16b,#8 1360.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1361.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1362 add v3.2d,v2.2d,v1.2d // "D + T1" 1363.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1364 add v25.2d,v25.2d,v19.2d 1365 ld1 {v24.2d},[x3],#16 1366 ext v25.16b,v25.16b,v25.16b,#8 1367 ext v5.16b,v3.16b,v0.16b,#8 1368 ext v6.16b,v4.16b,v3.16b,#8 1369 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1370.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1371 ext v7.16b,v23.16b,v16.16b,#8 1372.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1373.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1374 add v2.2d,v4.2d,v0.2d // "D + T1" 1375.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1376 add v24.2d,v24.2d,v20.2d 1377 ld1 {v25.2d},[x3],#16 1378 ext v24.16b,v24.16b,v24.16b,#8 1379 ext v5.16b,v2.16b,v3.16b,#8 1380 ext v6.16b,v1.16b,v2.16b,#8 1381 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1382.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1383 ext v7.16b,v16.16b,v17.16b,#8 1384.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1385.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1386 add v4.2d,v1.2d,v3.2d // "D + T1" 1387.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1388 add v25.2d,v25.2d,v21.2d 1389 ld1 {v24.2d},[x3],#16 1390 ext v25.16b,v25.16b,v25.16b,#8 1391 ext v5.16b,v4.16b,v2.16b,#8 1392 ext v6.16b,v0.16b,v4.16b,#8 1393 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1394.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1395 ext v7.16b,v17.16b,v18.16b,#8 1396.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1397.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1398 add v1.2d,v0.2d,v2.2d // "D + T1" 1399.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1400 add v24.2d,v24.2d,v22.2d 1401 ld1 {v25.2d},[x3],#16 1402 ext v24.16b,v24.16b,v24.16b,#8 1403 ext v5.16b,v1.16b,v4.16b,#8 1404 ext v6.16b,v3.16b,v1.16b,#8 1405 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1406.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1407 ext v7.16b,v18.16b,v19.16b,#8 1408.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1409.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1410 add v0.2d,v3.2d,v4.2d // "D + T1" 1411.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1412 add v25.2d,v25.2d,v23.2d 1413 ld1 {v24.2d},[x3],#16 1414 ext v25.16b,v25.16b,v25.16b,#8 1415 ext v5.16b,v0.16b,v1.16b,#8 1416 ext v6.16b,v2.16b,v0.16b,#8 1417 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1418.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1419 ext v7.16b,v19.16b,v20.16b,#8 1420.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1421.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1422 add v3.2d,v2.2d,v1.2d // "D + T1" 1423.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1424 add v24.2d,v24.2d,v16.2d 1425 ld1 {v25.2d},[x3],#16 1426 ext v24.16b,v24.16b,v24.16b,#8 1427 ext v5.16b,v3.16b,v0.16b,#8 1428 ext v6.16b,v4.16b,v3.16b,#8 1429 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1430.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1431 ext v7.16b,v20.16b,v21.16b,#8 1432.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1433.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1434 add v2.2d,v4.2d,v0.2d // "D + T1" 1435.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1436 add v25.2d,v25.2d,v17.2d 1437 ld1 {v24.2d},[x3],#16 1438 ext v25.16b,v25.16b,v25.16b,#8 1439 ext v5.16b,v2.16b,v3.16b,#8 1440 ext v6.16b,v1.16b,v2.16b,#8 1441 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1442.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1443 ext v7.16b,v21.16b,v22.16b,#8 1444.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1445.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1446 add v4.2d,v1.2d,v3.2d // "D + T1" 1447.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1448 add v24.2d,v24.2d,v18.2d 1449 ld1 {v25.2d},[x3],#16 1450 ext v24.16b,v24.16b,v24.16b,#8 1451 ext v5.16b,v4.16b,v2.16b,#8 1452 ext v6.16b,v0.16b,v4.16b,#8 1453 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1454.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1455 ext v7.16b,v22.16b,v23.16b,#8 1456.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1457.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1458 add v1.2d,v0.2d,v2.2d // "D + T1" 1459.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1460 add v25.2d,v25.2d,v19.2d 1461 ld1 {v24.2d},[x3],#16 1462 ext v25.16b,v25.16b,v25.16b,#8 1463 ext v5.16b,v1.16b,v4.16b,#8 1464 ext v6.16b,v3.16b,v1.16b,#8 1465 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1466.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1467 ext v7.16b,v23.16b,v16.16b,#8 1468.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1469.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1470 add v0.2d,v3.2d,v4.2d // "D + T1" 1471.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1472 add v24.2d,v24.2d,v20.2d 1473 ld1 {v25.2d},[x3],#16 1474 ext v24.16b,v24.16b,v24.16b,#8 1475 ext v5.16b,v0.16b,v1.16b,#8 1476 ext v6.16b,v2.16b,v0.16b,#8 1477 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1478.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1479 ext v7.16b,v16.16b,v17.16b,#8 1480.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1481.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1482 add v3.2d,v2.2d,v1.2d // "D + T1" 1483.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1484 add v25.2d,v25.2d,v21.2d 1485 ld1 {v24.2d},[x3],#16 1486 ext v25.16b,v25.16b,v25.16b,#8 1487 ext v5.16b,v3.16b,v0.16b,#8 1488 ext v6.16b,v4.16b,v3.16b,#8 1489 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1490.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1491 ext v7.16b,v17.16b,v18.16b,#8 1492.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1493.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1494 add v2.2d,v4.2d,v0.2d // "D + T1" 1495.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1496 add v24.2d,v24.2d,v22.2d 1497 ld1 {v25.2d},[x3],#16 1498 ext v24.16b,v24.16b,v24.16b,#8 1499 ext v5.16b,v2.16b,v3.16b,#8 1500 ext v6.16b,v1.16b,v2.16b,#8 1501 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1502.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1503 ext v7.16b,v18.16b,v19.16b,#8 1504.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1505.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1506 add v4.2d,v1.2d,v3.2d // "D + T1" 1507.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1508 add v25.2d,v25.2d,v23.2d 1509 ld1 {v24.2d},[x3],#16 1510 ext v25.16b,v25.16b,v25.16b,#8 1511 ext v5.16b,v4.16b,v2.16b,#8 1512 ext v6.16b,v0.16b,v4.16b,#8 1513 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1514.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1515 ext v7.16b,v19.16b,v20.16b,#8 1516.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1517.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1518 add v1.2d,v0.2d,v2.2d // "D + T1" 1519.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1520 ld1 {v25.2d},[x3],#16 1521 add v24.2d,v24.2d,v16.2d 1522 ld1 {v16.16b},[x1],#16 // load next input 1523 ext v24.16b,v24.16b,v24.16b,#8 1524 ext v5.16b,v1.16b,v4.16b,#8 1525 ext v6.16b,v3.16b,v1.16b,#8 1526 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1527.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1528 rev64 v16.16b,v16.16b 1529 add v0.2d,v3.2d,v4.2d // "D + T1" 1530.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1531 ld1 {v24.2d},[x3],#16 1532 add v25.2d,v25.2d,v17.2d 1533 ld1 {v17.16b},[x1],#16 // load next input 1534 ext v25.16b,v25.16b,v25.16b,#8 1535 ext v5.16b,v0.16b,v1.16b,#8 1536 ext v6.16b,v2.16b,v0.16b,#8 1537 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1538.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1539 rev64 v17.16b,v17.16b 1540 add v3.2d,v2.2d,v1.2d // "D + T1" 1541.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1542 ld1 {v25.2d},[x3],#16 1543 add v24.2d,v24.2d,v18.2d 1544 ld1 {v18.16b},[x1],#16 // load next input 1545 ext v24.16b,v24.16b,v24.16b,#8 1546 ext v5.16b,v3.16b,v0.16b,#8 1547 ext v6.16b,v4.16b,v3.16b,#8 1548 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1549.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1550 rev64 v18.16b,v18.16b 1551 add v2.2d,v4.2d,v0.2d // "D + T1" 1552.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1553 ld1 {v24.2d},[x3],#16 1554 add v25.2d,v25.2d,v19.2d 1555 ld1 {v19.16b},[x1],#16 // load next input 1556 ext v25.16b,v25.16b,v25.16b,#8 1557 ext v5.16b,v2.16b,v3.16b,#8 1558 ext v6.16b,v1.16b,v2.16b,#8 1559 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1560.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1561 rev64 v19.16b,v19.16b 1562 add v4.2d,v1.2d,v3.2d // "D + T1" 1563.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1564 ld1 {v25.2d},[x3],#16 1565 add v24.2d,v24.2d,v20.2d 1566 ld1 {v20.16b},[x1],#16 // load next input 1567 ext v24.16b,v24.16b,v24.16b,#8 1568 ext v5.16b,v4.16b,v2.16b,#8 1569 ext v6.16b,v0.16b,v4.16b,#8 1570 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1571.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1572 rev64 v20.16b,v20.16b 1573 add v1.2d,v0.2d,v2.2d // "D + T1" 1574.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1575 ld1 {v24.2d},[x3],#16 1576 add v25.2d,v25.2d,v21.2d 1577 ld1 {v21.16b},[x1],#16 // load next input 1578 ext v25.16b,v25.16b,v25.16b,#8 1579 ext v5.16b,v1.16b,v4.16b,#8 1580 ext v6.16b,v3.16b,v1.16b,#8 1581 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1582.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1583 rev64 v21.16b,v21.16b 1584 add v0.2d,v3.2d,v4.2d // "D + T1" 1585.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1586 ld1 {v25.2d},[x3],#16 1587 add v24.2d,v24.2d,v22.2d 1588 ld1 {v22.16b},[x1],#16 // load next input 1589 ext v24.16b,v24.16b,v24.16b,#8 1590 ext v5.16b,v0.16b,v1.16b,#8 1591 ext v6.16b,v2.16b,v0.16b,#8 1592 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1593.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1594 rev64 v22.16b,v22.16b 1595 add v3.2d,v2.2d,v1.2d // "D + T1" 1596.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1597 sub x3,x3,#80*8 // rewind 1598 add v25.2d,v25.2d,v23.2d 1599 ld1 {v23.16b},[x1],#16 // load next input 1600 ext v25.16b,v25.16b,v25.16b,#8 1601 ext v5.16b,v3.16b,v0.16b,#8 1602 ext v6.16b,v4.16b,v3.16b,#8 1603 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1604.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1605 rev64 v23.16b,v23.16b 1606 add v2.2d,v4.2d,v0.2d // "D + T1" 1607.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1608 add v0.2d,v0.2d,v26.2d // accumulate 1609 add v1.2d,v1.2d,v27.2d 1610 add v2.2d,v2.2d,v28.2d 1611 add v3.2d,v3.2d,v29.2d 1612 1613 cbnz x2,.Loop_hw 1614 1615 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1616 1617 ldr x29,[sp],#16 1618 ret 1619.size sha512_block_armv8,.-sha512_block_armv8 1620#endif 1621