1/* Do not modify. This file is auto-generated from sha512-armv8.pl. */ 2// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved. 3// 4// Licensed under the Apache License 2.0 (the "License"). You may not use 5// this file except in compliance with the License. You can obtain a copy 6// in the file LICENSE in the source distribution or at 7// https://www.openssl.org/source/license.html 8 9// ==================================================================== 10// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11// project. The module is, however, dual licensed under OpenSSL and 12// CRYPTOGAMS licenses depending on where you obtain it. For further 13// details see http://www.openssl.org/~appro/cryptogams/. 14// 15// Permission to use under GPLv2 terms is granted. 16// ==================================================================== 17// 18// SHA256/512 for ARMv8. 19// 20// Performance in cycles per processed byte and improvement coefficient 21// over code generated with "default" compiler: 22// 23// SHA256-hw SHA256(*) SHA512 24// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 25// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 26// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 27// Denver 2.01 10.5 (+26%) 6.70 (+8%) 28// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 29// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 30// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 31// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%) 32// 33// (*) Software SHA256 results are of lesser relevance, presented 34// mostly for informational purposes. 35// (**) The result is a trade-off: it's possible to improve it by 36// 10% (or by 1 cycle per round), but at the cost of 20% loss 37// on Cortex-A53 (or by 4 cycles per round). 38// (***) Super-impressive coefficients over gcc-generated code are 39// indication of some compiler "pathology", most notably code 40// generated with -mgeneral-regs-only is significantly faster 41// and the gap is only 40-90%. 42// 43// October 2016. 44// 45// Originally it was reckoned that it makes no sense to implement NEON 46// version of SHA256 for 64-bit processors. This is because performance 47// improvement on most wide-spread Cortex-A5x processors was observed 48// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was 49// observed that 32-bit NEON SHA256 performs significantly better than 50// 64-bit scalar version on *some* of the more recent processors. As 51// result 64-bit NEON version of SHA256 was added to provide best 52// all-round performance. For example it executes ~30% faster on X-Gene 53// and Mongoose. [For reference, NEON version of SHA512 is bound to 54// deliver much less improvement, likely *negative* on Cortex-A5x. 55// Which is why NEON support is limited to SHA256.] 56 57// $output is the last argument if it looks like a file (it has an extension) 58// $flavour is the first argument if it doesn't look like a file 59#include "arm_arch.h" 60#ifndef __KERNEL__ 61 62.hidden OPENSSL_armcap_P 63#endif 64 65.text 66 67.globl sha512_block_data_order 68.type sha512_block_data_order,%function 69.align 6 70sha512_block_data_order: 71 AARCH64_VALID_CALL_TARGET 72#ifndef __KERNEL__ 73 adrp x16,OPENSSL_armcap_P 74 ldr w16,[x16,#:lo12:OPENSSL_armcap_P] 75 tst w16,#ARMV8_SHA512 76 b.ne .Lv8_entry 77#endif 78 AARCH64_SIGN_LINK_REGISTER 79 stp x29,x30,[sp,#-128]! 80 add x29,sp,#0 81 82 stp x19,x20,[sp,#16] 83 stp x21,x22,[sp,#32] 84 stp x23,x24,[sp,#48] 85 stp x25,x26,[sp,#64] 86 stp x27,x28,[sp,#80] 87 sub sp,sp,#4*8 88 89 ldp x20,x21,[x0] // load context 90 ldp x22,x23,[x0,#2*8] 91 ldp x24,x25,[x0,#4*8] 92 add x2,x1,x2,lsl#7 // end of input 93 ldp x26,x27,[x0,#6*8] 94 adrp x30,.LK512 95 add x30,x30,#:lo12:.LK512 96 stp x0,x2,[x29,#96] 97 98.Loop: 99 ldp x3,x4,[x1],#2*8 100 ldr x19,[x30],#8 // *K++ 101 eor x28,x21,x22 // magic seed 102 str x1,[x29,#112] 103#ifndef __AARCH64EB__ 104 rev x3,x3 // 0 105#endif 106 ror x16,x24,#14 107 add x27,x27,x19 // h+=K[i] 108 eor x6,x24,x24,ror#23 109 and x17,x25,x24 110 bic x19,x26,x24 111 add x27,x27,x3 // h+=X[i] 112 orr x17,x17,x19 // Ch(e,f,g) 113 eor x19,x20,x21 // a^b, b^c in next round 114 eor x16,x16,x6,ror#18 // Sigma1(e) 115 ror x6,x20,#28 116 add x27,x27,x17 // h+=Ch(e,f,g) 117 eor x17,x20,x20,ror#5 118 add x27,x27,x16 // h+=Sigma1(e) 119 and x28,x28,x19 // (b^c)&=(a^b) 120 add x23,x23,x27 // d+=h 121 eor x28,x28,x21 // Maj(a,b,c) 122 eor x17,x6,x17,ror#34 // Sigma0(a) 123 add x27,x27,x28 // h+=Maj(a,b,c) 124 ldr x28,[x30],#8 // *K++, x19 in next round 125 //add x27,x27,x17 // h+=Sigma0(a) 126#ifndef __AARCH64EB__ 127 rev x4,x4 // 1 128#endif 129 ldp x5,x6,[x1],#2*8 130 add x27,x27,x17 // h+=Sigma0(a) 131 ror x16,x23,#14 132 add x26,x26,x28 // h+=K[i] 133 eor x7,x23,x23,ror#23 134 and x17,x24,x23 135 bic x28,x25,x23 136 add x26,x26,x4 // h+=X[i] 137 orr x17,x17,x28 // Ch(e,f,g) 138 eor x28,x27,x20 // a^b, b^c in next round 139 eor x16,x16,x7,ror#18 // Sigma1(e) 140 ror x7,x27,#28 141 add x26,x26,x17 // h+=Ch(e,f,g) 142 eor x17,x27,x27,ror#5 143 add x26,x26,x16 // h+=Sigma1(e) 144 and x19,x19,x28 // (b^c)&=(a^b) 145 add x22,x22,x26 // d+=h 146 eor x19,x19,x20 // Maj(a,b,c) 147 eor x17,x7,x17,ror#34 // Sigma0(a) 148 add x26,x26,x19 // h+=Maj(a,b,c) 149 ldr x19,[x30],#8 // *K++, x28 in next round 150 //add x26,x26,x17 // h+=Sigma0(a) 151#ifndef __AARCH64EB__ 152 rev x5,x5 // 2 153#endif 154 add x26,x26,x17 // h+=Sigma0(a) 155 ror x16,x22,#14 156 add x25,x25,x19 // h+=K[i] 157 eor x8,x22,x22,ror#23 158 and x17,x23,x22 159 bic x19,x24,x22 160 add x25,x25,x5 // h+=X[i] 161 orr x17,x17,x19 // Ch(e,f,g) 162 eor x19,x26,x27 // a^b, b^c in next round 163 eor x16,x16,x8,ror#18 // Sigma1(e) 164 ror x8,x26,#28 165 add x25,x25,x17 // h+=Ch(e,f,g) 166 eor x17,x26,x26,ror#5 167 add x25,x25,x16 // h+=Sigma1(e) 168 and x28,x28,x19 // (b^c)&=(a^b) 169 add x21,x21,x25 // d+=h 170 eor x28,x28,x27 // Maj(a,b,c) 171 eor x17,x8,x17,ror#34 // Sigma0(a) 172 add x25,x25,x28 // h+=Maj(a,b,c) 173 ldr x28,[x30],#8 // *K++, x19 in next round 174 //add x25,x25,x17 // h+=Sigma0(a) 175#ifndef __AARCH64EB__ 176 rev x6,x6 // 3 177#endif 178 ldp x7,x8,[x1],#2*8 179 add x25,x25,x17 // h+=Sigma0(a) 180 ror x16,x21,#14 181 add x24,x24,x28 // h+=K[i] 182 eor x9,x21,x21,ror#23 183 and x17,x22,x21 184 bic x28,x23,x21 185 add x24,x24,x6 // h+=X[i] 186 orr x17,x17,x28 // Ch(e,f,g) 187 eor x28,x25,x26 // a^b, b^c in next round 188 eor x16,x16,x9,ror#18 // Sigma1(e) 189 ror x9,x25,#28 190 add x24,x24,x17 // h+=Ch(e,f,g) 191 eor x17,x25,x25,ror#5 192 add x24,x24,x16 // h+=Sigma1(e) 193 and x19,x19,x28 // (b^c)&=(a^b) 194 add x20,x20,x24 // d+=h 195 eor x19,x19,x26 // Maj(a,b,c) 196 eor x17,x9,x17,ror#34 // Sigma0(a) 197 add x24,x24,x19 // h+=Maj(a,b,c) 198 ldr x19,[x30],#8 // *K++, x28 in next round 199 //add x24,x24,x17 // h+=Sigma0(a) 200#ifndef __AARCH64EB__ 201 rev x7,x7 // 4 202#endif 203 add x24,x24,x17 // h+=Sigma0(a) 204 ror x16,x20,#14 205 add x23,x23,x19 // h+=K[i] 206 eor x10,x20,x20,ror#23 207 and x17,x21,x20 208 bic x19,x22,x20 209 add x23,x23,x7 // h+=X[i] 210 orr x17,x17,x19 // Ch(e,f,g) 211 eor x19,x24,x25 // a^b, b^c in next round 212 eor x16,x16,x10,ror#18 // Sigma1(e) 213 ror x10,x24,#28 214 add x23,x23,x17 // h+=Ch(e,f,g) 215 eor x17,x24,x24,ror#5 216 add x23,x23,x16 // h+=Sigma1(e) 217 and x28,x28,x19 // (b^c)&=(a^b) 218 add x27,x27,x23 // d+=h 219 eor x28,x28,x25 // Maj(a,b,c) 220 eor x17,x10,x17,ror#34 // Sigma0(a) 221 add x23,x23,x28 // h+=Maj(a,b,c) 222 ldr x28,[x30],#8 // *K++, x19 in next round 223 //add x23,x23,x17 // h+=Sigma0(a) 224#ifndef __AARCH64EB__ 225 rev x8,x8 // 5 226#endif 227 ldp x9,x10,[x1],#2*8 228 add x23,x23,x17 // h+=Sigma0(a) 229 ror x16,x27,#14 230 add x22,x22,x28 // h+=K[i] 231 eor x11,x27,x27,ror#23 232 and x17,x20,x27 233 bic x28,x21,x27 234 add x22,x22,x8 // h+=X[i] 235 orr x17,x17,x28 // Ch(e,f,g) 236 eor x28,x23,x24 // a^b, b^c in next round 237 eor x16,x16,x11,ror#18 // Sigma1(e) 238 ror x11,x23,#28 239 add x22,x22,x17 // h+=Ch(e,f,g) 240 eor x17,x23,x23,ror#5 241 add x22,x22,x16 // h+=Sigma1(e) 242 and x19,x19,x28 // (b^c)&=(a^b) 243 add x26,x26,x22 // d+=h 244 eor x19,x19,x24 // Maj(a,b,c) 245 eor x17,x11,x17,ror#34 // Sigma0(a) 246 add x22,x22,x19 // h+=Maj(a,b,c) 247 ldr x19,[x30],#8 // *K++, x28 in next round 248 //add x22,x22,x17 // h+=Sigma0(a) 249#ifndef __AARCH64EB__ 250 rev x9,x9 // 6 251#endif 252 add x22,x22,x17 // h+=Sigma0(a) 253 ror x16,x26,#14 254 add x21,x21,x19 // h+=K[i] 255 eor x12,x26,x26,ror#23 256 and x17,x27,x26 257 bic x19,x20,x26 258 add x21,x21,x9 // h+=X[i] 259 orr x17,x17,x19 // Ch(e,f,g) 260 eor x19,x22,x23 // a^b, b^c in next round 261 eor x16,x16,x12,ror#18 // Sigma1(e) 262 ror x12,x22,#28 263 add x21,x21,x17 // h+=Ch(e,f,g) 264 eor x17,x22,x22,ror#5 265 add x21,x21,x16 // h+=Sigma1(e) 266 and x28,x28,x19 // (b^c)&=(a^b) 267 add x25,x25,x21 // d+=h 268 eor x28,x28,x23 // Maj(a,b,c) 269 eor x17,x12,x17,ror#34 // Sigma0(a) 270 add x21,x21,x28 // h+=Maj(a,b,c) 271 ldr x28,[x30],#8 // *K++, x19 in next round 272 //add x21,x21,x17 // h+=Sigma0(a) 273#ifndef __AARCH64EB__ 274 rev x10,x10 // 7 275#endif 276 ldp x11,x12,[x1],#2*8 277 add x21,x21,x17 // h+=Sigma0(a) 278 ror x16,x25,#14 279 add x20,x20,x28 // h+=K[i] 280 eor x13,x25,x25,ror#23 281 and x17,x26,x25 282 bic x28,x27,x25 283 add x20,x20,x10 // h+=X[i] 284 orr x17,x17,x28 // Ch(e,f,g) 285 eor x28,x21,x22 // a^b, b^c in next round 286 eor x16,x16,x13,ror#18 // Sigma1(e) 287 ror x13,x21,#28 288 add x20,x20,x17 // h+=Ch(e,f,g) 289 eor x17,x21,x21,ror#5 290 add x20,x20,x16 // h+=Sigma1(e) 291 and x19,x19,x28 // (b^c)&=(a^b) 292 add x24,x24,x20 // d+=h 293 eor x19,x19,x22 // Maj(a,b,c) 294 eor x17,x13,x17,ror#34 // Sigma0(a) 295 add x20,x20,x19 // h+=Maj(a,b,c) 296 ldr x19,[x30],#8 // *K++, x28 in next round 297 //add x20,x20,x17 // h+=Sigma0(a) 298#ifndef __AARCH64EB__ 299 rev x11,x11 // 8 300#endif 301 add x20,x20,x17 // h+=Sigma0(a) 302 ror x16,x24,#14 303 add x27,x27,x19 // h+=K[i] 304 eor x14,x24,x24,ror#23 305 and x17,x25,x24 306 bic x19,x26,x24 307 add x27,x27,x11 // h+=X[i] 308 orr x17,x17,x19 // Ch(e,f,g) 309 eor x19,x20,x21 // a^b, b^c in next round 310 eor x16,x16,x14,ror#18 // Sigma1(e) 311 ror x14,x20,#28 312 add x27,x27,x17 // h+=Ch(e,f,g) 313 eor x17,x20,x20,ror#5 314 add x27,x27,x16 // h+=Sigma1(e) 315 and x28,x28,x19 // (b^c)&=(a^b) 316 add x23,x23,x27 // d+=h 317 eor x28,x28,x21 // Maj(a,b,c) 318 eor x17,x14,x17,ror#34 // Sigma0(a) 319 add x27,x27,x28 // h+=Maj(a,b,c) 320 ldr x28,[x30],#8 // *K++, x19 in next round 321 //add x27,x27,x17 // h+=Sigma0(a) 322#ifndef __AARCH64EB__ 323 rev x12,x12 // 9 324#endif 325 ldp x13,x14,[x1],#2*8 326 add x27,x27,x17 // h+=Sigma0(a) 327 ror x16,x23,#14 328 add x26,x26,x28 // h+=K[i] 329 eor x15,x23,x23,ror#23 330 and x17,x24,x23 331 bic x28,x25,x23 332 add x26,x26,x12 // h+=X[i] 333 orr x17,x17,x28 // Ch(e,f,g) 334 eor x28,x27,x20 // a^b, b^c in next round 335 eor x16,x16,x15,ror#18 // Sigma1(e) 336 ror x15,x27,#28 337 add x26,x26,x17 // h+=Ch(e,f,g) 338 eor x17,x27,x27,ror#5 339 add x26,x26,x16 // h+=Sigma1(e) 340 and x19,x19,x28 // (b^c)&=(a^b) 341 add x22,x22,x26 // d+=h 342 eor x19,x19,x20 // Maj(a,b,c) 343 eor x17,x15,x17,ror#34 // Sigma0(a) 344 add x26,x26,x19 // h+=Maj(a,b,c) 345 ldr x19,[x30],#8 // *K++, x28 in next round 346 //add x26,x26,x17 // h+=Sigma0(a) 347#ifndef __AARCH64EB__ 348 rev x13,x13 // 10 349#endif 350 add x26,x26,x17 // h+=Sigma0(a) 351 ror x16,x22,#14 352 add x25,x25,x19 // h+=K[i] 353 eor x0,x22,x22,ror#23 354 and x17,x23,x22 355 bic x19,x24,x22 356 add x25,x25,x13 // h+=X[i] 357 orr x17,x17,x19 // Ch(e,f,g) 358 eor x19,x26,x27 // a^b, b^c in next round 359 eor x16,x16,x0,ror#18 // Sigma1(e) 360 ror x0,x26,#28 361 add x25,x25,x17 // h+=Ch(e,f,g) 362 eor x17,x26,x26,ror#5 363 add x25,x25,x16 // h+=Sigma1(e) 364 and x28,x28,x19 // (b^c)&=(a^b) 365 add x21,x21,x25 // d+=h 366 eor x28,x28,x27 // Maj(a,b,c) 367 eor x17,x0,x17,ror#34 // Sigma0(a) 368 add x25,x25,x28 // h+=Maj(a,b,c) 369 ldr x28,[x30],#8 // *K++, x19 in next round 370 //add x25,x25,x17 // h+=Sigma0(a) 371#ifndef __AARCH64EB__ 372 rev x14,x14 // 11 373#endif 374 ldp x15,x0,[x1],#2*8 375 add x25,x25,x17 // h+=Sigma0(a) 376 str x6,[sp,#24] 377 ror x16,x21,#14 378 add x24,x24,x28 // h+=K[i] 379 eor x6,x21,x21,ror#23 380 and x17,x22,x21 381 bic x28,x23,x21 382 add x24,x24,x14 // h+=X[i] 383 orr x17,x17,x28 // Ch(e,f,g) 384 eor x28,x25,x26 // a^b, b^c in next round 385 eor x16,x16,x6,ror#18 // Sigma1(e) 386 ror x6,x25,#28 387 add x24,x24,x17 // h+=Ch(e,f,g) 388 eor x17,x25,x25,ror#5 389 add x24,x24,x16 // h+=Sigma1(e) 390 and x19,x19,x28 // (b^c)&=(a^b) 391 add x20,x20,x24 // d+=h 392 eor x19,x19,x26 // Maj(a,b,c) 393 eor x17,x6,x17,ror#34 // Sigma0(a) 394 add x24,x24,x19 // h+=Maj(a,b,c) 395 ldr x19,[x30],#8 // *K++, x28 in next round 396 //add x24,x24,x17 // h+=Sigma0(a) 397#ifndef __AARCH64EB__ 398 rev x15,x15 // 12 399#endif 400 add x24,x24,x17 // h+=Sigma0(a) 401 str x7,[sp,#0] 402 ror x16,x20,#14 403 add x23,x23,x19 // h+=K[i] 404 eor x7,x20,x20,ror#23 405 and x17,x21,x20 406 bic x19,x22,x20 407 add x23,x23,x15 // h+=X[i] 408 orr x17,x17,x19 // Ch(e,f,g) 409 eor x19,x24,x25 // a^b, b^c in next round 410 eor x16,x16,x7,ror#18 // Sigma1(e) 411 ror x7,x24,#28 412 add x23,x23,x17 // h+=Ch(e,f,g) 413 eor x17,x24,x24,ror#5 414 add x23,x23,x16 // h+=Sigma1(e) 415 and x28,x28,x19 // (b^c)&=(a^b) 416 add x27,x27,x23 // d+=h 417 eor x28,x28,x25 // Maj(a,b,c) 418 eor x17,x7,x17,ror#34 // Sigma0(a) 419 add x23,x23,x28 // h+=Maj(a,b,c) 420 ldr x28,[x30],#8 // *K++, x19 in next round 421 //add x23,x23,x17 // h+=Sigma0(a) 422#ifndef __AARCH64EB__ 423 rev x0,x0 // 13 424#endif 425 ldp x1,x2,[x1] 426 add x23,x23,x17 // h+=Sigma0(a) 427 str x8,[sp,#8] 428 ror x16,x27,#14 429 add x22,x22,x28 // h+=K[i] 430 eor x8,x27,x27,ror#23 431 and x17,x20,x27 432 bic x28,x21,x27 433 add x22,x22,x0 // h+=X[i] 434 orr x17,x17,x28 // Ch(e,f,g) 435 eor x28,x23,x24 // a^b, b^c in next round 436 eor x16,x16,x8,ror#18 // Sigma1(e) 437 ror x8,x23,#28 438 add x22,x22,x17 // h+=Ch(e,f,g) 439 eor x17,x23,x23,ror#5 440 add x22,x22,x16 // h+=Sigma1(e) 441 and x19,x19,x28 // (b^c)&=(a^b) 442 add x26,x26,x22 // d+=h 443 eor x19,x19,x24 // Maj(a,b,c) 444 eor x17,x8,x17,ror#34 // Sigma0(a) 445 add x22,x22,x19 // h+=Maj(a,b,c) 446 ldr x19,[x30],#8 // *K++, x28 in next round 447 //add x22,x22,x17 // h+=Sigma0(a) 448#ifndef __AARCH64EB__ 449 rev x1,x1 // 14 450#endif 451 ldr x6,[sp,#24] 452 add x22,x22,x17 // h+=Sigma0(a) 453 str x9,[sp,#16] 454 ror x16,x26,#14 455 add x21,x21,x19 // h+=K[i] 456 eor x9,x26,x26,ror#23 457 and x17,x27,x26 458 bic x19,x20,x26 459 add x21,x21,x1 // h+=X[i] 460 orr x17,x17,x19 // Ch(e,f,g) 461 eor x19,x22,x23 // a^b, b^c in next round 462 eor x16,x16,x9,ror#18 // Sigma1(e) 463 ror x9,x22,#28 464 add x21,x21,x17 // h+=Ch(e,f,g) 465 eor x17,x22,x22,ror#5 466 add x21,x21,x16 // h+=Sigma1(e) 467 and x28,x28,x19 // (b^c)&=(a^b) 468 add x25,x25,x21 // d+=h 469 eor x28,x28,x23 // Maj(a,b,c) 470 eor x17,x9,x17,ror#34 // Sigma0(a) 471 add x21,x21,x28 // h+=Maj(a,b,c) 472 ldr x28,[x30],#8 // *K++, x19 in next round 473 //add x21,x21,x17 // h+=Sigma0(a) 474#ifndef __AARCH64EB__ 475 rev x2,x2 // 15 476#endif 477 ldr x7,[sp,#0] 478 add x21,x21,x17 // h+=Sigma0(a) 479 str x10,[sp,#24] 480 ror x16,x25,#14 481 add x20,x20,x28 // h+=K[i] 482 ror x9,x4,#1 483 and x17,x26,x25 484 ror x8,x1,#19 485 bic x28,x27,x25 486 ror x10,x21,#28 487 add x20,x20,x2 // h+=X[i] 488 eor x16,x16,x25,ror#18 489 eor x9,x9,x4,ror#8 490 orr x17,x17,x28 // Ch(e,f,g) 491 eor x28,x21,x22 // a^b, b^c in next round 492 eor x16,x16,x25,ror#41 // Sigma1(e) 493 eor x10,x10,x21,ror#34 494 add x20,x20,x17 // h+=Ch(e,f,g) 495 and x19,x19,x28 // (b^c)&=(a^b) 496 eor x8,x8,x1,ror#61 497 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 498 add x20,x20,x16 // h+=Sigma1(e) 499 eor x19,x19,x22 // Maj(a,b,c) 500 eor x17,x10,x21,ror#39 // Sigma0(a) 501 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 502 add x3,x3,x12 503 add x24,x24,x20 // d+=h 504 add x20,x20,x19 // h+=Maj(a,b,c) 505 ldr x19,[x30],#8 // *K++, x28 in next round 506 add x3,x3,x9 507 add x20,x20,x17 // h+=Sigma0(a) 508 add x3,x3,x8 509.Loop_16_xx: 510 ldr x8,[sp,#8] 511 str x11,[sp,#0] 512 ror x16,x24,#14 513 add x27,x27,x19 // h+=K[i] 514 ror x10,x5,#1 515 and x17,x25,x24 516 ror x9,x2,#19 517 bic x19,x26,x24 518 ror x11,x20,#28 519 add x27,x27,x3 // h+=X[i] 520 eor x16,x16,x24,ror#18 521 eor x10,x10,x5,ror#8 522 orr x17,x17,x19 // Ch(e,f,g) 523 eor x19,x20,x21 // a^b, b^c in next round 524 eor x16,x16,x24,ror#41 // Sigma1(e) 525 eor x11,x11,x20,ror#34 526 add x27,x27,x17 // h+=Ch(e,f,g) 527 and x28,x28,x19 // (b^c)&=(a^b) 528 eor x9,x9,x2,ror#61 529 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 530 add x27,x27,x16 // h+=Sigma1(e) 531 eor x28,x28,x21 // Maj(a,b,c) 532 eor x17,x11,x20,ror#39 // Sigma0(a) 533 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 534 add x4,x4,x13 535 add x23,x23,x27 // d+=h 536 add x27,x27,x28 // h+=Maj(a,b,c) 537 ldr x28,[x30],#8 // *K++, x19 in next round 538 add x4,x4,x10 539 add x27,x27,x17 // h+=Sigma0(a) 540 add x4,x4,x9 541 ldr x9,[sp,#16] 542 str x12,[sp,#8] 543 ror x16,x23,#14 544 add x26,x26,x28 // h+=K[i] 545 ror x11,x6,#1 546 and x17,x24,x23 547 ror x10,x3,#19 548 bic x28,x25,x23 549 ror x12,x27,#28 550 add x26,x26,x4 // h+=X[i] 551 eor x16,x16,x23,ror#18 552 eor x11,x11,x6,ror#8 553 orr x17,x17,x28 // Ch(e,f,g) 554 eor x28,x27,x20 // a^b, b^c in next round 555 eor x16,x16,x23,ror#41 // Sigma1(e) 556 eor x12,x12,x27,ror#34 557 add x26,x26,x17 // h+=Ch(e,f,g) 558 and x19,x19,x28 // (b^c)&=(a^b) 559 eor x10,x10,x3,ror#61 560 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 561 add x26,x26,x16 // h+=Sigma1(e) 562 eor x19,x19,x20 // Maj(a,b,c) 563 eor x17,x12,x27,ror#39 // Sigma0(a) 564 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 565 add x5,x5,x14 566 add x22,x22,x26 // d+=h 567 add x26,x26,x19 // h+=Maj(a,b,c) 568 ldr x19,[x30],#8 // *K++, x28 in next round 569 add x5,x5,x11 570 add x26,x26,x17 // h+=Sigma0(a) 571 add x5,x5,x10 572 ldr x10,[sp,#24] 573 str x13,[sp,#16] 574 ror x16,x22,#14 575 add x25,x25,x19 // h+=K[i] 576 ror x12,x7,#1 577 and x17,x23,x22 578 ror x11,x4,#19 579 bic x19,x24,x22 580 ror x13,x26,#28 581 add x25,x25,x5 // h+=X[i] 582 eor x16,x16,x22,ror#18 583 eor x12,x12,x7,ror#8 584 orr x17,x17,x19 // Ch(e,f,g) 585 eor x19,x26,x27 // a^b, b^c in next round 586 eor x16,x16,x22,ror#41 // Sigma1(e) 587 eor x13,x13,x26,ror#34 588 add x25,x25,x17 // h+=Ch(e,f,g) 589 and x28,x28,x19 // (b^c)&=(a^b) 590 eor x11,x11,x4,ror#61 591 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 592 add x25,x25,x16 // h+=Sigma1(e) 593 eor x28,x28,x27 // Maj(a,b,c) 594 eor x17,x13,x26,ror#39 // Sigma0(a) 595 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 596 add x6,x6,x15 597 add x21,x21,x25 // d+=h 598 add x25,x25,x28 // h+=Maj(a,b,c) 599 ldr x28,[x30],#8 // *K++, x19 in next round 600 add x6,x6,x12 601 add x25,x25,x17 // h+=Sigma0(a) 602 add x6,x6,x11 603 ldr x11,[sp,#0] 604 str x14,[sp,#24] 605 ror x16,x21,#14 606 add x24,x24,x28 // h+=K[i] 607 ror x13,x8,#1 608 and x17,x22,x21 609 ror x12,x5,#19 610 bic x28,x23,x21 611 ror x14,x25,#28 612 add x24,x24,x6 // h+=X[i] 613 eor x16,x16,x21,ror#18 614 eor x13,x13,x8,ror#8 615 orr x17,x17,x28 // Ch(e,f,g) 616 eor x28,x25,x26 // a^b, b^c in next round 617 eor x16,x16,x21,ror#41 // Sigma1(e) 618 eor x14,x14,x25,ror#34 619 add x24,x24,x17 // h+=Ch(e,f,g) 620 and x19,x19,x28 // (b^c)&=(a^b) 621 eor x12,x12,x5,ror#61 622 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 623 add x24,x24,x16 // h+=Sigma1(e) 624 eor x19,x19,x26 // Maj(a,b,c) 625 eor x17,x14,x25,ror#39 // Sigma0(a) 626 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 627 add x7,x7,x0 628 add x20,x20,x24 // d+=h 629 add x24,x24,x19 // h+=Maj(a,b,c) 630 ldr x19,[x30],#8 // *K++, x28 in next round 631 add x7,x7,x13 632 add x24,x24,x17 // h+=Sigma0(a) 633 add x7,x7,x12 634 ldr x12,[sp,#8] 635 str x15,[sp,#0] 636 ror x16,x20,#14 637 add x23,x23,x19 // h+=K[i] 638 ror x14,x9,#1 639 and x17,x21,x20 640 ror x13,x6,#19 641 bic x19,x22,x20 642 ror x15,x24,#28 643 add x23,x23,x7 // h+=X[i] 644 eor x16,x16,x20,ror#18 645 eor x14,x14,x9,ror#8 646 orr x17,x17,x19 // Ch(e,f,g) 647 eor x19,x24,x25 // a^b, b^c in next round 648 eor x16,x16,x20,ror#41 // Sigma1(e) 649 eor x15,x15,x24,ror#34 650 add x23,x23,x17 // h+=Ch(e,f,g) 651 and x28,x28,x19 // (b^c)&=(a^b) 652 eor x13,x13,x6,ror#61 653 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 654 add x23,x23,x16 // h+=Sigma1(e) 655 eor x28,x28,x25 // Maj(a,b,c) 656 eor x17,x15,x24,ror#39 // Sigma0(a) 657 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 658 add x8,x8,x1 659 add x27,x27,x23 // d+=h 660 add x23,x23,x28 // h+=Maj(a,b,c) 661 ldr x28,[x30],#8 // *K++, x19 in next round 662 add x8,x8,x14 663 add x23,x23,x17 // h+=Sigma0(a) 664 add x8,x8,x13 665 ldr x13,[sp,#16] 666 str x0,[sp,#8] 667 ror x16,x27,#14 668 add x22,x22,x28 // h+=K[i] 669 ror x15,x10,#1 670 and x17,x20,x27 671 ror x14,x7,#19 672 bic x28,x21,x27 673 ror x0,x23,#28 674 add x22,x22,x8 // h+=X[i] 675 eor x16,x16,x27,ror#18 676 eor x15,x15,x10,ror#8 677 orr x17,x17,x28 // Ch(e,f,g) 678 eor x28,x23,x24 // a^b, b^c in next round 679 eor x16,x16,x27,ror#41 // Sigma1(e) 680 eor x0,x0,x23,ror#34 681 add x22,x22,x17 // h+=Ch(e,f,g) 682 and x19,x19,x28 // (b^c)&=(a^b) 683 eor x14,x14,x7,ror#61 684 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 685 add x22,x22,x16 // h+=Sigma1(e) 686 eor x19,x19,x24 // Maj(a,b,c) 687 eor x17,x0,x23,ror#39 // Sigma0(a) 688 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 689 add x9,x9,x2 690 add x26,x26,x22 // d+=h 691 add x22,x22,x19 // h+=Maj(a,b,c) 692 ldr x19,[x30],#8 // *K++, x28 in next round 693 add x9,x9,x15 694 add x22,x22,x17 // h+=Sigma0(a) 695 add x9,x9,x14 696 ldr x14,[sp,#24] 697 str x1,[sp,#16] 698 ror x16,x26,#14 699 add x21,x21,x19 // h+=K[i] 700 ror x0,x11,#1 701 and x17,x27,x26 702 ror x15,x8,#19 703 bic x19,x20,x26 704 ror x1,x22,#28 705 add x21,x21,x9 // h+=X[i] 706 eor x16,x16,x26,ror#18 707 eor x0,x0,x11,ror#8 708 orr x17,x17,x19 // Ch(e,f,g) 709 eor x19,x22,x23 // a^b, b^c in next round 710 eor x16,x16,x26,ror#41 // Sigma1(e) 711 eor x1,x1,x22,ror#34 712 add x21,x21,x17 // h+=Ch(e,f,g) 713 and x28,x28,x19 // (b^c)&=(a^b) 714 eor x15,x15,x8,ror#61 715 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 716 add x21,x21,x16 // h+=Sigma1(e) 717 eor x28,x28,x23 // Maj(a,b,c) 718 eor x17,x1,x22,ror#39 // Sigma0(a) 719 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 720 add x10,x10,x3 721 add x25,x25,x21 // d+=h 722 add x21,x21,x28 // h+=Maj(a,b,c) 723 ldr x28,[x30],#8 // *K++, x19 in next round 724 add x10,x10,x0 725 add x21,x21,x17 // h+=Sigma0(a) 726 add x10,x10,x15 727 ldr x15,[sp,#0] 728 str x2,[sp,#24] 729 ror x16,x25,#14 730 add x20,x20,x28 // h+=K[i] 731 ror x1,x12,#1 732 and x17,x26,x25 733 ror x0,x9,#19 734 bic x28,x27,x25 735 ror x2,x21,#28 736 add x20,x20,x10 // h+=X[i] 737 eor x16,x16,x25,ror#18 738 eor x1,x1,x12,ror#8 739 orr x17,x17,x28 // Ch(e,f,g) 740 eor x28,x21,x22 // a^b, b^c in next round 741 eor x16,x16,x25,ror#41 // Sigma1(e) 742 eor x2,x2,x21,ror#34 743 add x20,x20,x17 // h+=Ch(e,f,g) 744 and x19,x19,x28 // (b^c)&=(a^b) 745 eor x0,x0,x9,ror#61 746 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 747 add x20,x20,x16 // h+=Sigma1(e) 748 eor x19,x19,x22 // Maj(a,b,c) 749 eor x17,x2,x21,ror#39 // Sigma0(a) 750 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 751 add x11,x11,x4 752 add x24,x24,x20 // d+=h 753 add x20,x20,x19 // h+=Maj(a,b,c) 754 ldr x19,[x30],#8 // *K++, x28 in next round 755 add x11,x11,x1 756 add x20,x20,x17 // h+=Sigma0(a) 757 add x11,x11,x0 758 ldr x0,[sp,#8] 759 str x3,[sp,#0] 760 ror x16,x24,#14 761 add x27,x27,x19 // h+=K[i] 762 ror x2,x13,#1 763 and x17,x25,x24 764 ror x1,x10,#19 765 bic x19,x26,x24 766 ror x3,x20,#28 767 add x27,x27,x11 // h+=X[i] 768 eor x16,x16,x24,ror#18 769 eor x2,x2,x13,ror#8 770 orr x17,x17,x19 // Ch(e,f,g) 771 eor x19,x20,x21 // a^b, b^c in next round 772 eor x16,x16,x24,ror#41 // Sigma1(e) 773 eor x3,x3,x20,ror#34 774 add x27,x27,x17 // h+=Ch(e,f,g) 775 and x28,x28,x19 // (b^c)&=(a^b) 776 eor x1,x1,x10,ror#61 777 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 778 add x27,x27,x16 // h+=Sigma1(e) 779 eor x28,x28,x21 // Maj(a,b,c) 780 eor x17,x3,x20,ror#39 // Sigma0(a) 781 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 782 add x12,x12,x5 783 add x23,x23,x27 // d+=h 784 add x27,x27,x28 // h+=Maj(a,b,c) 785 ldr x28,[x30],#8 // *K++, x19 in next round 786 add x12,x12,x2 787 add x27,x27,x17 // h+=Sigma0(a) 788 add x12,x12,x1 789 ldr x1,[sp,#16] 790 str x4,[sp,#8] 791 ror x16,x23,#14 792 add x26,x26,x28 // h+=K[i] 793 ror x3,x14,#1 794 and x17,x24,x23 795 ror x2,x11,#19 796 bic x28,x25,x23 797 ror x4,x27,#28 798 add x26,x26,x12 // h+=X[i] 799 eor x16,x16,x23,ror#18 800 eor x3,x3,x14,ror#8 801 orr x17,x17,x28 // Ch(e,f,g) 802 eor x28,x27,x20 // a^b, b^c in next round 803 eor x16,x16,x23,ror#41 // Sigma1(e) 804 eor x4,x4,x27,ror#34 805 add x26,x26,x17 // h+=Ch(e,f,g) 806 and x19,x19,x28 // (b^c)&=(a^b) 807 eor x2,x2,x11,ror#61 808 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 809 add x26,x26,x16 // h+=Sigma1(e) 810 eor x19,x19,x20 // Maj(a,b,c) 811 eor x17,x4,x27,ror#39 // Sigma0(a) 812 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 813 add x13,x13,x6 814 add x22,x22,x26 // d+=h 815 add x26,x26,x19 // h+=Maj(a,b,c) 816 ldr x19,[x30],#8 // *K++, x28 in next round 817 add x13,x13,x3 818 add x26,x26,x17 // h+=Sigma0(a) 819 add x13,x13,x2 820 ldr x2,[sp,#24] 821 str x5,[sp,#16] 822 ror x16,x22,#14 823 add x25,x25,x19 // h+=K[i] 824 ror x4,x15,#1 825 and x17,x23,x22 826 ror x3,x12,#19 827 bic x19,x24,x22 828 ror x5,x26,#28 829 add x25,x25,x13 // h+=X[i] 830 eor x16,x16,x22,ror#18 831 eor x4,x4,x15,ror#8 832 orr x17,x17,x19 // Ch(e,f,g) 833 eor x19,x26,x27 // a^b, b^c in next round 834 eor x16,x16,x22,ror#41 // Sigma1(e) 835 eor x5,x5,x26,ror#34 836 add x25,x25,x17 // h+=Ch(e,f,g) 837 and x28,x28,x19 // (b^c)&=(a^b) 838 eor x3,x3,x12,ror#61 839 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 840 add x25,x25,x16 // h+=Sigma1(e) 841 eor x28,x28,x27 // Maj(a,b,c) 842 eor x17,x5,x26,ror#39 // Sigma0(a) 843 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 844 add x14,x14,x7 845 add x21,x21,x25 // d+=h 846 add x25,x25,x28 // h+=Maj(a,b,c) 847 ldr x28,[x30],#8 // *K++, x19 in next round 848 add x14,x14,x4 849 add x25,x25,x17 // h+=Sigma0(a) 850 add x14,x14,x3 851 ldr x3,[sp,#0] 852 str x6,[sp,#24] 853 ror x16,x21,#14 854 add x24,x24,x28 // h+=K[i] 855 ror x5,x0,#1 856 and x17,x22,x21 857 ror x4,x13,#19 858 bic x28,x23,x21 859 ror x6,x25,#28 860 add x24,x24,x14 // h+=X[i] 861 eor x16,x16,x21,ror#18 862 eor x5,x5,x0,ror#8 863 orr x17,x17,x28 // Ch(e,f,g) 864 eor x28,x25,x26 // a^b, b^c in next round 865 eor x16,x16,x21,ror#41 // Sigma1(e) 866 eor x6,x6,x25,ror#34 867 add x24,x24,x17 // h+=Ch(e,f,g) 868 and x19,x19,x28 // (b^c)&=(a^b) 869 eor x4,x4,x13,ror#61 870 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 871 add x24,x24,x16 // h+=Sigma1(e) 872 eor x19,x19,x26 // Maj(a,b,c) 873 eor x17,x6,x25,ror#39 // Sigma0(a) 874 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 875 add x15,x15,x8 876 add x20,x20,x24 // d+=h 877 add x24,x24,x19 // h+=Maj(a,b,c) 878 ldr x19,[x30],#8 // *K++, x28 in next round 879 add x15,x15,x5 880 add x24,x24,x17 // h+=Sigma0(a) 881 add x15,x15,x4 882 ldr x4,[sp,#8] 883 str x7,[sp,#0] 884 ror x16,x20,#14 885 add x23,x23,x19 // h+=K[i] 886 ror x6,x1,#1 887 and x17,x21,x20 888 ror x5,x14,#19 889 bic x19,x22,x20 890 ror x7,x24,#28 891 add x23,x23,x15 // h+=X[i] 892 eor x16,x16,x20,ror#18 893 eor x6,x6,x1,ror#8 894 orr x17,x17,x19 // Ch(e,f,g) 895 eor x19,x24,x25 // a^b, b^c in next round 896 eor x16,x16,x20,ror#41 // Sigma1(e) 897 eor x7,x7,x24,ror#34 898 add x23,x23,x17 // h+=Ch(e,f,g) 899 and x28,x28,x19 // (b^c)&=(a^b) 900 eor x5,x5,x14,ror#61 901 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 902 add x23,x23,x16 // h+=Sigma1(e) 903 eor x28,x28,x25 // Maj(a,b,c) 904 eor x17,x7,x24,ror#39 // Sigma0(a) 905 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 906 add x0,x0,x9 907 add x27,x27,x23 // d+=h 908 add x23,x23,x28 // h+=Maj(a,b,c) 909 ldr x28,[x30],#8 // *K++, x19 in next round 910 add x0,x0,x6 911 add x23,x23,x17 // h+=Sigma0(a) 912 add x0,x0,x5 913 ldr x5,[sp,#16] 914 str x8,[sp,#8] 915 ror x16,x27,#14 916 add x22,x22,x28 // h+=K[i] 917 ror x7,x2,#1 918 and x17,x20,x27 919 ror x6,x15,#19 920 bic x28,x21,x27 921 ror x8,x23,#28 922 add x22,x22,x0 // h+=X[i] 923 eor x16,x16,x27,ror#18 924 eor x7,x7,x2,ror#8 925 orr x17,x17,x28 // Ch(e,f,g) 926 eor x28,x23,x24 // a^b, b^c in next round 927 eor x16,x16,x27,ror#41 // Sigma1(e) 928 eor x8,x8,x23,ror#34 929 add x22,x22,x17 // h+=Ch(e,f,g) 930 and x19,x19,x28 // (b^c)&=(a^b) 931 eor x6,x6,x15,ror#61 932 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 933 add x22,x22,x16 // h+=Sigma1(e) 934 eor x19,x19,x24 // Maj(a,b,c) 935 eor x17,x8,x23,ror#39 // Sigma0(a) 936 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 937 add x1,x1,x10 938 add x26,x26,x22 // d+=h 939 add x22,x22,x19 // h+=Maj(a,b,c) 940 ldr x19,[x30],#8 // *K++, x28 in next round 941 add x1,x1,x7 942 add x22,x22,x17 // h+=Sigma0(a) 943 add x1,x1,x6 944 ldr x6,[sp,#24] 945 str x9,[sp,#16] 946 ror x16,x26,#14 947 add x21,x21,x19 // h+=K[i] 948 ror x8,x3,#1 949 and x17,x27,x26 950 ror x7,x0,#19 951 bic x19,x20,x26 952 ror x9,x22,#28 953 add x21,x21,x1 // h+=X[i] 954 eor x16,x16,x26,ror#18 955 eor x8,x8,x3,ror#8 956 orr x17,x17,x19 // Ch(e,f,g) 957 eor x19,x22,x23 // a^b, b^c in next round 958 eor x16,x16,x26,ror#41 // Sigma1(e) 959 eor x9,x9,x22,ror#34 960 add x21,x21,x17 // h+=Ch(e,f,g) 961 and x28,x28,x19 // (b^c)&=(a^b) 962 eor x7,x7,x0,ror#61 963 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 964 add x21,x21,x16 // h+=Sigma1(e) 965 eor x28,x28,x23 // Maj(a,b,c) 966 eor x17,x9,x22,ror#39 // Sigma0(a) 967 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 968 add x2,x2,x11 969 add x25,x25,x21 // d+=h 970 add x21,x21,x28 // h+=Maj(a,b,c) 971 ldr x28,[x30],#8 // *K++, x19 in next round 972 add x2,x2,x8 973 add x21,x21,x17 // h+=Sigma0(a) 974 add x2,x2,x7 975 ldr x7,[sp,#0] 976 str x10,[sp,#24] 977 ror x16,x25,#14 978 add x20,x20,x28 // h+=K[i] 979 ror x9,x4,#1 980 and x17,x26,x25 981 ror x8,x1,#19 982 bic x28,x27,x25 983 ror x10,x21,#28 984 add x20,x20,x2 // h+=X[i] 985 eor x16,x16,x25,ror#18 986 eor x9,x9,x4,ror#8 987 orr x17,x17,x28 // Ch(e,f,g) 988 eor x28,x21,x22 // a^b, b^c in next round 989 eor x16,x16,x25,ror#41 // Sigma1(e) 990 eor x10,x10,x21,ror#34 991 add x20,x20,x17 // h+=Ch(e,f,g) 992 and x19,x19,x28 // (b^c)&=(a^b) 993 eor x8,x8,x1,ror#61 994 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 995 add x20,x20,x16 // h+=Sigma1(e) 996 eor x19,x19,x22 // Maj(a,b,c) 997 eor x17,x10,x21,ror#39 // Sigma0(a) 998 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 999 add x3,x3,x12 1000 add x24,x24,x20 // d+=h 1001 add x20,x20,x19 // h+=Maj(a,b,c) 1002 ldr x19,[x30],#8 // *K++, x28 in next round 1003 add x3,x3,x9 1004 add x20,x20,x17 // h+=Sigma0(a) 1005 add x3,x3,x8 1006 cbnz x19,.Loop_16_xx 1007 1008 ldp x0,x2,[x29,#96] 1009 ldr x1,[x29,#112] 1010 sub x30,x30,#648 // rewind 1011 1012 ldp x3,x4,[x0] 1013 ldp x5,x6,[x0,#2*8] 1014 add x1,x1,#14*8 // advance input pointer 1015 ldp x7,x8,[x0,#4*8] 1016 add x20,x20,x3 1017 ldp x9,x10,[x0,#6*8] 1018 add x21,x21,x4 1019 add x22,x22,x5 1020 add x23,x23,x6 1021 stp x20,x21,[x0] 1022 add x24,x24,x7 1023 add x25,x25,x8 1024 stp x22,x23,[x0,#2*8] 1025 add x26,x26,x9 1026 add x27,x27,x10 1027 cmp x1,x2 1028 stp x24,x25,[x0,#4*8] 1029 stp x26,x27,[x0,#6*8] 1030 b.ne .Loop 1031 1032 ldp x19,x20,[x29,#16] 1033 add sp,sp,#4*8 1034 ldp x21,x22,[x29,#32] 1035 ldp x23,x24,[x29,#48] 1036 ldp x25,x26,[x29,#64] 1037 ldp x27,x28,[x29,#80] 1038 ldp x29,x30,[sp],#128 1039 AARCH64_VALIDATE_LINK_REGISTER 1040 ret 1041.size sha512_block_data_order,.-sha512_block_data_order 1042 1043.section .rodata 1044 1045.align 6 1046.type .LK512,%object 1047.LK512: 1048.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1049.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1050.quad 0x3956c25bf348b538,0x59f111f1b605d019 1051.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1052.quad 0xd807aa98a3030242,0x12835b0145706fbe 1053.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1054.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1055.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1056.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1057.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1058.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1059.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1060.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1061.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1062.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1063.quad 0x06ca6351e003826f,0x142929670a0e6e70 1064.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1065.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1066.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1067.quad 0x81c2c92e47edaee6,0x92722c851482353b 1068.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1069.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1070.quad 0xd192e819d6ef5218,0xd69906245565a910 1071.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1072.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1073.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1074.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1075.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1076.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1077.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1078.quad 0x90befffa23631e28,0xa4506cebde82bde9 1079.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1080.quad 0xca273eceea26619c,0xd186b8c721c0c207 1081.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1082.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1083.quad 0x113f9804bef90dae,0x1b710b35131c471b 1084.quad 0x28db77f523047d84,0x32caab7b40c72493 1085.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1086.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1087.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1088.quad 0 // terminator 1089.size .LK512,.-.LK512 1090.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1091.align 2 1092.align 2 1093 1094.text 1095#ifndef __KERNEL__ 1096.type sha512_block_armv8,%function 1097.align 6 1098sha512_block_armv8: 1099.Lv8_entry: 1100 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later 1101 stp x29,x30,[sp,#-16]! 1102 add x29,sp,#0 1103 1104 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1105 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1106 1107 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1108 adrp x3,.LK512 1109 add x3,x3,#:lo12:.LK512 1110 1111 rev64 v16.16b,v16.16b 1112 rev64 v17.16b,v17.16b 1113 rev64 v18.16b,v18.16b 1114 rev64 v19.16b,v19.16b 1115 rev64 v20.16b,v20.16b 1116 rev64 v21.16b,v21.16b 1117 rev64 v22.16b,v22.16b 1118 rev64 v23.16b,v23.16b 1119 b .Loop_hw 1120 1121.align 4 1122.Loop_hw: 1123 ld1 {v24.2d},[x3],#16 1124 subs x2,x2,#1 1125 sub x4,x1,#128 1126 orr v26.16b,v0.16b,v0.16b // offload 1127 orr v27.16b,v1.16b,v1.16b 1128 orr v28.16b,v2.16b,v2.16b 1129 orr v29.16b,v3.16b,v3.16b 1130 csel x1,x1,x4,ne // conditional rewind 1131 add v24.2d,v24.2d,v16.2d 1132 ld1 {v25.2d},[x3],#16 1133 ext v24.16b,v24.16b,v24.16b,#8 1134 ext v5.16b,v2.16b,v3.16b,#8 1135 ext v6.16b,v1.16b,v2.16b,#8 1136 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1137.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1138 ext v7.16b,v20.16b,v21.16b,#8 1139.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1140.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1141 add v4.2d,v1.2d,v3.2d // "D + T1" 1142.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1143 add v25.2d,v25.2d,v17.2d 1144 ld1 {v24.2d},[x3],#16 1145 ext v25.16b,v25.16b,v25.16b,#8 1146 ext v5.16b,v4.16b,v2.16b,#8 1147 ext v6.16b,v0.16b,v4.16b,#8 1148 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1149.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1150 ext v7.16b,v21.16b,v22.16b,#8 1151.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1152.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1153 add v1.2d,v0.2d,v2.2d // "D + T1" 1154.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1155 add v24.2d,v24.2d,v18.2d 1156 ld1 {v25.2d},[x3],#16 1157 ext v24.16b,v24.16b,v24.16b,#8 1158 ext v5.16b,v1.16b,v4.16b,#8 1159 ext v6.16b,v3.16b,v1.16b,#8 1160 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1161.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1162 ext v7.16b,v22.16b,v23.16b,#8 1163.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1164.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1165 add v0.2d,v3.2d,v4.2d // "D + T1" 1166.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1167 add v25.2d,v25.2d,v19.2d 1168 ld1 {v24.2d},[x3],#16 1169 ext v25.16b,v25.16b,v25.16b,#8 1170 ext v5.16b,v0.16b,v1.16b,#8 1171 ext v6.16b,v2.16b,v0.16b,#8 1172 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1173.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1174 ext v7.16b,v23.16b,v16.16b,#8 1175.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1176.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1177 add v3.2d,v2.2d,v1.2d // "D + T1" 1178.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1179 add v24.2d,v24.2d,v20.2d 1180 ld1 {v25.2d},[x3],#16 1181 ext v24.16b,v24.16b,v24.16b,#8 1182 ext v5.16b,v3.16b,v0.16b,#8 1183 ext v6.16b,v4.16b,v3.16b,#8 1184 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1185.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1186 ext v7.16b,v16.16b,v17.16b,#8 1187.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1188.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1189 add v2.2d,v4.2d,v0.2d // "D + T1" 1190.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1191 add v25.2d,v25.2d,v21.2d 1192 ld1 {v24.2d},[x3],#16 1193 ext v25.16b,v25.16b,v25.16b,#8 1194 ext v5.16b,v2.16b,v3.16b,#8 1195 ext v6.16b,v1.16b,v2.16b,#8 1196 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1197.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1198 ext v7.16b,v17.16b,v18.16b,#8 1199.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1200.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1201 add v4.2d,v1.2d,v3.2d // "D + T1" 1202.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1203 add v24.2d,v24.2d,v22.2d 1204 ld1 {v25.2d},[x3],#16 1205 ext v24.16b,v24.16b,v24.16b,#8 1206 ext v5.16b,v4.16b,v2.16b,#8 1207 ext v6.16b,v0.16b,v4.16b,#8 1208 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1209.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1210 ext v7.16b,v18.16b,v19.16b,#8 1211.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1212.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1213 add v1.2d,v0.2d,v2.2d // "D + T1" 1214.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1215 add v25.2d,v25.2d,v23.2d 1216 ld1 {v24.2d},[x3],#16 1217 ext v25.16b,v25.16b,v25.16b,#8 1218 ext v5.16b,v1.16b,v4.16b,#8 1219 ext v6.16b,v3.16b,v1.16b,#8 1220 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1221.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1222 ext v7.16b,v19.16b,v20.16b,#8 1223.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1224.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1225 add v0.2d,v3.2d,v4.2d // "D + T1" 1226.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1227 add v24.2d,v24.2d,v16.2d 1228 ld1 {v25.2d},[x3],#16 1229 ext v24.16b,v24.16b,v24.16b,#8 1230 ext v5.16b,v0.16b,v1.16b,#8 1231 ext v6.16b,v2.16b,v0.16b,#8 1232 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1233.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1234 ext v7.16b,v20.16b,v21.16b,#8 1235.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1236.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1237 add v3.2d,v2.2d,v1.2d // "D + T1" 1238.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1239 add v25.2d,v25.2d,v17.2d 1240 ld1 {v24.2d},[x3],#16 1241 ext v25.16b,v25.16b,v25.16b,#8 1242 ext v5.16b,v3.16b,v0.16b,#8 1243 ext v6.16b,v4.16b,v3.16b,#8 1244 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1245.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1246 ext v7.16b,v21.16b,v22.16b,#8 1247.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1248.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1249 add v2.2d,v4.2d,v0.2d // "D + T1" 1250.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1251 add v24.2d,v24.2d,v18.2d 1252 ld1 {v25.2d},[x3],#16 1253 ext v24.16b,v24.16b,v24.16b,#8 1254 ext v5.16b,v2.16b,v3.16b,#8 1255 ext v6.16b,v1.16b,v2.16b,#8 1256 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1257.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1258 ext v7.16b,v22.16b,v23.16b,#8 1259.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1260.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1261 add v4.2d,v1.2d,v3.2d // "D + T1" 1262.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1263 add v25.2d,v25.2d,v19.2d 1264 ld1 {v24.2d},[x3],#16 1265 ext v25.16b,v25.16b,v25.16b,#8 1266 ext v5.16b,v4.16b,v2.16b,#8 1267 ext v6.16b,v0.16b,v4.16b,#8 1268 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1269.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1270 ext v7.16b,v23.16b,v16.16b,#8 1271.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1272.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1273 add v1.2d,v0.2d,v2.2d // "D + T1" 1274.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1275 add v24.2d,v24.2d,v20.2d 1276 ld1 {v25.2d},[x3],#16 1277 ext v24.16b,v24.16b,v24.16b,#8 1278 ext v5.16b,v1.16b,v4.16b,#8 1279 ext v6.16b,v3.16b,v1.16b,#8 1280 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1281.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1282 ext v7.16b,v16.16b,v17.16b,#8 1283.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1284.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1285 add v0.2d,v3.2d,v4.2d // "D + T1" 1286.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1287 add v25.2d,v25.2d,v21.2d 1288 ld1 {v24.2d},[x3],#16 1289 ext v25.16b,v25.16b,v25.16b,#8 1290 ext v5.16b,v0.16b,v1.16b,#8 1291 ext v6.16b,v2.16b,v0.16b,#8 1292 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1293.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1294 ext v7.16b,v17.16b,v18.16b,#8 1295.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1296.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1297 add v3.2d,v2.2d,v1.2d // "D + T1" 1298.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1299 add v24.2d,v24.2d,v22.2d 1300 ld1 {v25.2d},[x3],#16 1301 ext v24.16b,v24.16b,v24.16b,#8 1302 ext v5.16b,v3.16b,v0.16b,#8 1303 ext v6.16b,v4.16b,v3.16b,#8 1304 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1305.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1306 ext v7.16b,v18.16b,v19.16b,#8 1307.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1308.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1309 add v2.2d,v4.2d,v0.2d // "D + T1" 1310.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1311 add v25.2d,v25.2d,v23.2d 1312 ld1 {v24.2d},[x3],#16 1313 ext v25.16b,v25.16b,v25.16b,#8 1314 ext v5.16b,v2.16b,v3.16b,#8 1315 ext v6.16b,v1.16b,v2.16b,#8 1316 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1317.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1318 ext v7.16b,v19.16b,v20.16b,#8 1319.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1320.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1321 add v4.2d,v1.2d,v3.2d // "D + T1" 1322.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1323 add v24.2d,v24.2d,v16.2d 1324 ld1 {v25.2d},[x3],#16 1325 ext v24.16b,v24.16b,v24.16b,#8 1326 ext v5.16b,v4.16b,v2.16b,#8 1327 ext v6.16b,v0.16b,v4.16b,#8 1328 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1329.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1330 ext v7.16b,v20.16b,v21.16b,#8 1331.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1332.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1333 add v1.2d,v0.2d,v2.2d // "D + T1" 1334.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1335 add v25.2d,v25.2d,v17.2d 1336 ld1 {v24.2d},[x3],#16 1337 ext v25.16b,v25.16b,v25.16b,#8 1338 ext v5.16b,v1.16b,v4.16b,#8 1339 ext v6.16b,v3.16b,v1.16b,#8 1340 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1341.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1342 ext v7.16b,v21.16b,v22.16b,#8 1343.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1344.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1345 add v0.2d,v3.2d,v4.2d // "D + T1" 1346.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1347 add v24.2d,v24.2d,v18.2d 1348 ld1 {v25.2d},[x3],#16 1349 ext v24.16b,v24.16b,v24.16b,#8 1350 ext v5.16b,v0.16b,v1.16b,#8 1351 ext v6.16b,v2.16b,v0.16b,#8 1352 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1353.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1354 ext v7.16b,v22.16b,v23.16b,#8 1355.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1356.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1357 add v3.2d,v2.2d,v1.2d // "D + T1" 1358.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1359 add v25.2d,v25.2d,v19.2d 1360 ld1 {v24.2d},[x3],#16 1361 ext v25.16b,v25.16b,v25.16b,#8 1362 ext v5.16b,v3.16b,v0.16b,#8 1363 ext v6.16b,v4.16b,v3.16b,#8 1364 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1365.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1366 ext v7.16b,v23.16b,v16.16b,#8 1367.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1368.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1369 add v2.2d,v4.2d,v0.2d // "D + T1" 1370.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1371 add v24.2d,v24.2d,v20.2d 1372 ld1 {v25.2d},[x3],#16 1373 ext v24.16b,v24.16b,v24.16b,#8 1374 ext v5.16b,v2.16b,v3.16b,#8 1375 ext v6.16b,v1.16b,v2.16b,#8 1376 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1377.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1378 ext v7.16b,v16.16b,v17.16b,#8 1379.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1380.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1381 add v4.2d,v1.2d,v3.2d // "D + T1" 1382.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1383 add v25.2d,v25.2d,v21.2d 1384 ld1 {v24.2d},[x3],#16 1385 ext v25.16b,v25.16b,v25.16b,#8 1386 ext v5.16b,v4.16b,v2.16b,#8 1387 ext v6.16b,v0.16b,v4.16b,#8 1388 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1389.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1390 ext v7.16b,v17.16b,v18.16b,#8 1391.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1392.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1393 add v1.2d,v0.2d,v2.2d // "D + T1" 1394.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1395 add v24.2d,v24.2d,v22.2d 1396 ld1 {v25.2d},[x3],#16 1397 ext v24.16b,v24.16b,v24.16b,#8 1398 ext v5.16b,v1.16b,v4.16b,#8 1399 ext v6.16b,v3.16b,v1.16b,#8 1400 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1401.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1402 ext v7.16b,v18.16b,v19.16b,#8 1403.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1404.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1405 add v0.2d,v3.2d,v4.2d // "D + T1" 1406.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1407 add v25.2d,v25.2d,v23.2d 1408 ld1 {v24.2d},[x3],#16 1409 ext v25.16b,v25.16b,v25.16b,#8 1410 ext v5.16b,v0.16b,v1.16b,#8 1411 ext v6.16b,v2.16b,v0.16b,#8 1412 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1413.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1414 ext v7.16b,v19.16b,v20.16b,#8 1415.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1416.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1417 add v3.2d,v2.2d,v1.2d // "D + T1" 1418.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1419 add v24.2d,v24.2d,v16.2d 1420 ld1 {v25.2d},[x3],#16 1421 ext v24.16b,v24.16b,v24.16b,#8 1422 ext v5.16b,v3.16b,v0.16b,#8 1423 ext v6.16b,v4.16b,v3.16b,#8 1424 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1425.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1426 ext v7.16b,v20.16b,v21.16b,#8 1427.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1428.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1429 add v2.2d,v4.2d,v0.2d // "D + T1" 1430.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1431 add v25.2d,v25.2d,v17.2d 1432 ld1 {v24.2d},[x3],#16 1433 ext v25.16b,v25.16b,v25.16b,#8 1434 ext v5.16b,v2.16b,v3.16b,#8 1435 ext v6.16b,v1.16b,v2.16b,#8 1436 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1437.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1438 ext v7.16b,v21.16b,v22.16b,#8 1439.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1440.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1441 add v4.2d,v1.2d,v3.2d // "D + T1" 1442.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1443 add v24.2d,v24.2d,v18.2d 1444 ld1 {v25.2d},[x3],#16 1445 ext v24.16b,v24.16b,v24.16b,#8 1446 ext v5.16b,v4.16b,v2.16b,#8 1447 ext v6.16b,v0.16b,v4.16b,#8 1448 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1449.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1450 ext v7.16b,v22.16b,v23.16b,#8 1451.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1452.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1453 add v1.2d,v0.2d,v2.2d // "D + T1" 1454.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1455 add v25.2d,v25.2d,v19.2d 1456 ld1 {v24.2d},[x3],#16 1457 ext v25.16b,v25.16b,v25.16b,#8 1458 ext v5.16b,v1.16b,v4.16b,#8 1459 ext v6.16b,v3.16b,v1.16b,#8 1460 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1461.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1462 ext v7.16b,v23.16b,v16.16b,#8 1463.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1464.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1465 add v0.2d,v3.2d,v4.2d // "D + T1" 1466.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1467 add v24.2d,v24.2d,v20.2d 1468 ld1 {v25.2d},[x3],#16 1469 ext v24.16b,v24.16b,v24.16b,#8 1470 ext v5.16b,v0.16b,v1.16b,#8 1471 ext v6.16b,v2.16b,v0.16b,#8 1472 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1473.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1474 ext v7.16b,v16.16b,v17.16b,#8 1475.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1476.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1477 add v3.2d,v2.2d,v1.2d // "D + T1" 1478.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1479 add v25.2d,v25.2d,v21.2d 1480 ld1 {v24.2d},[x3],#16 1481 ext v25.16b,v25.16b,v25.16b,#8 1482 ext v5.16b,v3.16b,v0.16b,#8 1483 ext v6.16b,v4.16b,v3.16b,#8 1484 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1485.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1486 ext v7.16b,v17.16b,v18.16b,#8 1487.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1488.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1489 add v2.2d,v4.2d,v0.2d // "D + T1" 1490.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1491 add v24.2d,v24.2d,v22.2d 1492 ld1 {v25.2d},[x3],#16 1493 ext v24.16b,v24.16b,v24.16b,#8 1494 ext v5.16b,v2.16b,v3.16b,#8 1495 ext v6.16b,v1.16b,v2.16b,#8 1496 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1497.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1498 ext v7.16b,v18.16b,v19.16b,#8 1499.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1500.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1501 add v4.2d,v1.2d,v3.2d // "D + T1" 1502.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1503 add v25.2d,v25.2d,v23.2d 1504 ld1 {v24.2d},[x3],#16 1505 ext v25.16b,v25.16b,v25.16b,#8 1506 ext v5.16b,v4.16b,v2.16b,#8 1507 ext v6.16b,v0.16b,v4.16b,#8 1508 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1509.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1510 ext v7.16b,v19.16b,v20.16b,#8 1511.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1512.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1513 add v1.2d,v0.2d,v2.2d // "D + T1" 1514.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1515 ld1 {v25.2d},[x3],#16 1516 add v24.2d,v24.2d,v16.2d 1517 ld1 {v16.16b},[x1],#16 // load next input 1518 ext v24.16b,v24.16b,v24.16b,#8 1519 ext v5.16b,v1.16b,v4.16b,#8 1520 ext v6.16b,v3.16b,v1.16b,#8 1521 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1522.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1523 rev64 v16.16b,v16.16b 1524 add v0.2d,v3.2d,v4.2d // "D + T1" 1525.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1526 ld1 {v24.2d},[x3],#16 1527 add v25.2d,v25.2d,v17.2d 1528 ld1 {v17.16b},[x1],#16 // load next input 1529 ext v25.16b,v25.16b,v25.16b,#8 1530 ext v5.16b,v0.16b,v1.16b,#8 1531 ext v6.16b,v2.16b,v0.16b,#8 1532 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1533.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1534 rev64 v17.16b,v17.16b 1535 add v3.2d,v2.2d,v1.2d // "D + T1" 1536.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1537 ld1 {v25.2d},[x3],#16 1538 add v24.2d,v24.2d,v18.2d 1539 ld1 {v18.16b},[x1],#16 // load next input 1540 ext v24.16b,v24.16b,v24.16b,#8 1541 ext v5.16b,v3.16b,v0.16b,#8 1542 ext v6.16b,v4.16b,v3.16b,#8 1543 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1544.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1545 rev64 v18.16b,v18.16b 1546 add v2.2d,v4.2d,v0.2d // "D + T1" 1547.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1548 ld1 {v24.2d},[x3],#16 1549 add v25.2d,v25.2d,v19.2d 1550 ld1 {v19.16b},[x1],#16 // load next input 1551 ext v25.16b,v25.16b,v25.16b,#8 1552 ext v5.16b,v2.16b,v3.16b,#8 1553 ext v6.16b,v1.16b,v2.16b,#8 1554 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1555.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1556 rev64 v19.16b,v19.16b 1557 add v4.2d,v1.2d,v3.2d // "D + T1" 1558.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1559 ld1 {v25.2d},[x3],#16 1560 add v24.2d,v24.2d,v20.2d 1561 ld1 {v20.16b},[x1],#16 // load next input 1562 ext v24.16b,v24.16b,v24.16b,#8 1563 ext v5.16b,v4.16b,v2.16b,#8 1564 ext v6.16b,v0.16b,v4.16b,#8 1565 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1566.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1567 rev64 v20.16b,v20.16b 1568 add v1.2d,v0.2d,v2.2d // "D + T1" 1569.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1570 ld1 {v24.2d},[x3],#16 1571 add v25.2d,v25.2d,v21.2d 1572 ld1 {v21.16b},[x1],#16 // load next input 1573 ext v25.16b,v25.16b,v25.16b,#8 1574 ext v5.16b,v1.16b,v4.16b,#8 1575 ext v6.16b,v3.16b,v1.16b,#8 1576 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1577.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1578 rev64 v21.16b,v21.16b 1579 add v0.2d,v3.2d,v4.2d // "D + T1" 1580.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1581 ld1 {v25.2d},[x3],#16 1582 add v24.2d,v24.2d,v22.2d 1583 ld1 {v22.16b},[x1],#16 // load next input 1584 ext v24.16b,v24.16b,v24.16b,#8 1585 ext v5.16b,v0.16b,v1.16b,#8 1586 ext v6.16b,v2.16b,v0.16b,#8 1587 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1588.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1589 rev64 v22.16b,v22.16b 1590 add v3.2d,v2.2d,v1.2d // "D + T1" 1591.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1592 sub x3,x3,#80*8 // rewind 1593 add v25.2d,v25.2d,v23.2d 1594 ld1 {v23.16b},[x1],#16 // load next input 1595 ext v25.16b,v25.16b,v25.16b,#8 1596 ext v5.16b,v3.16b,v0.16b,#8 1597 ext v6.16b,v4.16b,v3.16b,#8 1598 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1599.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1600 rev64 v23.16b,v23.16b 1601 add v2.2d,v4.2d,v0.2d // "D + T1" 1602.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1603 add v0.2d,v0.2d,v26.2d // accumulate 1604 add v1.2d,v1.2d,v27.2d 1605 add v2.2d,v2.2d,v28.2d 1606 add v3.2d,v3.2d,v29.2d 1607 1608 cbnz x2,.Loop_hw 1609 1610 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1611 1612 ldr x29,[sp],#16 1613 ret 1614.size sha512_block_armv8,.-sha512_block_armv8 1615#endif 1616