1/* Do not modify. This file is auto-generated from sha512-armv8.pl. */ 2// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 3// 4// Licensed under the Apache License 2.0 (the "License"). You may not use 5// this file except in compliance with the License. You can obtain a copy 6// in the file LICENSE in the source distribution or at 7// https://www.openssl.org/source/license.html 8 9// ==================================================================== 10// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11// project. The module is, however, dual licensed under OpenSSL and 12// CRYPTOGAMS licenses depending on where you obtain it. For further 13// details see http://www.openssl.org/~appro/cryptogams/. 14// 15// Permission to use under GPLv2 terms is granted. 16// ==================================================================== 17// 18// SHA256/512 for ARMv8. 19// 20// Performance in cycles per processed byte and improvement coefficient 21// over code generated with "default" compiler: 22// 23// SHA256-hw SHA256(*) SHA512 24// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 25// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 26// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 27// Denver 2.01 10.5 (+26%) 6.70 (+8%) 28// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 29// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 30// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 31// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%) 32// 33// (*) Software SHA256 results are of lesser relevance, presented 34// mostly for informational purposes. 35// (**) The result is a trade-off: it's possible to improve it by 36// 10% (or by 1 cycle per round), but at the cost of 20% loss 37// on Cortex-A53 (or by 4 cycles per round). 38// (***) Super-impressive coefficients over gcc-generated code are 39// indication of some compiler "pathology", most notably code 40// generated with -mgeneral-regs-only is significantly faster 41// and the gap is only 40-90%. 42// 43// October 2016. 44// 45// Originally it was reckoned that it makes no sense to implement NEON 46// version of SHA256 for 64-bit processors. This is because performance 47// improvement on most wide-spread Cortex-A5x processors was observed 48// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was 49// observed that 32-bit NEON SHA256 performs significantly better than 50// 64-bit scalar version on *some* of the more recent processors. As 51// result 64-bit NEON version of SHA256 was added to provide best 52// all-round performance. For example it executes ~30% faster on X-Gene 53// and Mongoose. [For reference, NEON version of SHA512 is bound to 54// deliver much less improvement, likely *negative* on Cortex-A5x. 55// Which is why NEON support is limited to SHA256.] 56 57// $output is the last argument if it looks like a file (it has an extension) 58// $flavour is the first argument if it doesn't look like a file 59#include "arm_arch.h" 60#ifndef __KERNEL__ 61 62.hidden OPENSSL_armcap_P 63#endif 64 65.text 66 67.globl sha512_block_data_order 68.type sha512_block_data_order,%function 69.align 6 70sha512_block_data_order: 71 AARCH64_VALID_CALL_TARGET 72#ifndef __KERNEL__ 73 adrp x16,OPENSSL_armcap_P 74 ldr w16,[x16,#:lo12:OPENSSL_armcap_P] 75 tst w16,#ARMV8_SHA512 76 b.ne .Lv8_entry 77#endif 78 AARCH64_SIGN_LINK_REGISTER 79 stp x29,x30,[sp,#-128]! 80 add x29,sp,#0 81 82 stp x19,x20,[sp,#16] 83 stp x21,x22,[sp,#32] 84 stp x23,x24,[sp,#48] 85 stp x25,x26,[sp,#64] 86 stp x27,x28,[sp,#80] 87 sub sp,sp,#4*8 88 89 ldp x20,x21,[x0] // load context 90 ldp x22,x23,[x0,#2*8] 91 ldp x24,x25,[x0,#4*8] 92 add x2,x1,x2,lsl#7 // end of input 93 ldp x26,x27,[x0,#6*8] 94 adr x30,.LK512 95 stp x0,x2,[x29,#96] 96 97.Loop: 98 ldp x3,x4,[x1],#2*8 99 ldr x19,[x30],#8 // *K++ 100 eor x28,x21,x22 // magic seed 101 str x1,[x29,#112] 102#ifndef __AARCH64EB__ 103 rev x3,x3 // 0 104#endif 105 ror x16,x24,#14 106 add x27,x27,x19 // h+=K[i] 107 eor x6,x24,x24,ror#23 108 and x17,x25,x24 109 bic x19,x26,x24 110 add x27,x27,x3 // h+=X[i] 111 orr x17,x17,x19 // Ch(e,f,g) 112 eor x19,x20,x21 // a^b, b^c in next round 113 eor x16,x16,x6,ror#18 // Sigma1(e) 114 ror x6,x20,#28 115 add x27,x27,x17 // h+=Ch(e,f,g) 116 eor x17,x20,x20,ror#5 117 add x27,x27,x16 // h+=Sigma1(e) 118 and x28,x28,x19 // (b^c)&=(a^b) 119 add x23,x23,x27 // d+=h 120 eor x28,x28,x21 // Maj(a,b,c) 121 eor x17,x6,x17,ror#34 // Sigma0(a) 122 add x27,x27,x28 // h+=Maj(a,b,c) 123 ldr x28,[x30],#8 // *K++, x19 in next round 124 //add x27,x27,x17 // h+=Sigma0(a) 125#ifndef __AARCH64EB__ 126 rev x4,x4 // 1 127#endif 128 ldp x5,x6,[x1],#2*8 129 add x27,x27,x17 // h+=Sigma0(a) 130 ror x16,x23,#14 131 add x26,x26,x28 // h+=K[i] 132 eor x7,x23,x23,ror#23 133 and x17,x24,x23 134 bic x28,x25,x23 135 add x26,x26,x4 // h+=X[i] 136 orr x17,x17,x28 // Ch(e,f,g) 137 eor x28,x27,x20 // a^b, b^c in next round 138 eor x16,x16,x7,ror#18 // Sigma1(e) 139 ror x7,x27,#28 140 add x26,x26,x17 // h+=Ch(e,f,g) 141 eor x17,x27,x27,ror#5 142 add x26,x26,x16 // h+=Sigma1(e) 143 and x19,x19,x28 // (b^c)&=(a^b) 144 add x22,x22,x26 // d+=h 145 eor x19,x19,x20 // Maj(a,b,c) 146 eor x17,x7,x17,ror#34 // Sigma0(a) 147 add x26,x26,x19 // h+=Maj(a,b,c) 148 ldr x19,[x30],#8 // *K++, x28 in next round 149 //add x26,x26,x17 // h+=Sigma0(a) 150#ifndef __AARCH64EB__ 151 rev x5,x5 // 2 152#endif 153 add x26,x26,x17 // h+=Sigma0(a) 154 ror x16,x22,#14 155 add x25,x25,x19 // h+=K[i] 156 eor x8,x22,x22,ror#23 157 and x17,x23,x22 158 bic x19,x24,x22 159 add x25,x25,x5 // h+=X[i] 160 orr x17,x17,x19 // Ch(e,f,g) 161 eor x19,x26,x27 // a^b, b^c in next round 162 eor x16,x16,x8,ror#18 // Sigma1(e) 163 ror x8,x26,#28 164 add x25,x25,x17 // h+=Ch(e,f,g) 165 eor x17,x26,x26,ror#5 166 add x25,x25,x16 // h+=Sigma1(e) 167 and x28,x28,x19 // (b^c)&=(a^b) 168 add x21,x21,x25 // d+=h 169 eor x28,x28,x27 // Maj(a,b,c) 170 eor x17,x8,x17,ror#34 // Sigma0(a) 171 add x25,x25,x28 // h+=Maj(a,b,c) 172 ldr x28,[x30],#8 // *K++, x19 in next round 173 //add x25,x25,x17 // h+=Sigma0(a) 174#ifndef __AARCH64EB__ 175 rev x6,x6 // 3 176#endif 177 ldp x7,x8,[x1],#2*8 178 add x25,x25,x17 // h+=Sigma0(a) 179 ror x16,x21,#14 180 add x24,x24,x28 // h+=K[i] 181 eor x9,x21,x21,ror#23 182 and x17,x22,x21 183 bic x28,x23,x21 184 add x24,x24,x6 // h+=X[i] 185 orr x17,x17,x28 // Ch(e,f,g) 186 eor x28,x25,x26 // a^b, b^c in next round 187 eor x16,x16,x9,ror#18 // Sigma1(e) 188 ror x9,x25,#28 189 add x24,x24,x17 // h+=Ch(e,f,g) 190 eor x17,x25,x25,ror#5 191 add x24,x24,x16 // h+=Sigma1(e) 192 and x19,x19,x28 // (b^c)&=(a^b) 193 add x20,x20,x24 // d+=h 194 eor x19,x19,x26 // Maj(a,b,c) 195 eor x17,x9,x17,ror#34 // Sigma0(a) 196 add x24,x24,x19 // h+=Maj(a,b,c) 197 ldr x19,[x30],#8 // *K++, x28 in next round 198 //add x24,x24,x17 // h+=Sigma0(a) 199#ifndef __AARCH64EB__ 200 rev x7,x7 // 4 201#endif 202 add x24,x24,x17 // h+=Sigma0(a) 203 ror x16,x20,#14 204 add x23,x23,x19 // h+=K[i] 205 eor x10,x20,x20,ror#23 206 and x17,x21,x20 207 bic x19,x22,x20 208 add x23,x23,x7 // h+=X[i] 209 orr x17,x17,x19 // Ch(e,f,g) 210 eor x19,x24,x25 // a^b, b^c in next round 211 eor x16,x16,x10,ror#18 // Sigma1(e) 212 ror x10,x24,#28 213 add x23,x23,x17 // h+=Ch(e,f,g) 214 eor x17,x24,x24,ror#5 215 add x23,x23,x16 // h+=Sigma1(e) 216 and x28,x28,x19 // (b^c)&=(a^b) 217 add x27,x27,x23 // d+=h 218 eor x28,x28,x25 // Maj(a,b,c) 219 eor x17,x10,x17,ror#34 // Sigma0(a) 220 add x23,x23,x28 // h+=Maj(a,b,c) 221 ldr x28,[x30],#8 // *K++, x19 in next round 222 //add x23,x23,x17 // h+=Sigma0(a) 223#ifndef __AARCH64EB__ 224 rev x8,x8 // 5 225#endif 226 ldp x9,x10,[x1],#2*8 227 add x23,x23,x17 // h+=Sigma0(a) 228 ror x16,x27,#14 229 add x22,x22,x28 // h+=K[i] 230 eor x11,x27,x27,ror#23 231 and x17,x20,x27 232 bic x28,x21,x27 233 add x22,x22,x8 // h+=X[i] 234 orr x17,x17,x28 // Ch(e,f,g) 235 eor x28,x23,x24 // a^b, b^c in next round 236 eor x16,x16,x11,ror#18 // Sigma1(e) 237 ror x11,x23,#28 238 add x22,x22,x17 // h+=Ch(e,f,g) 239 eor x17,x23,x23,ror#5 240 add x22,x22,x16 // h+=Sigma1(e) 241 and x19,x19,x28 // (b^c)&=(a^b) 242 add x26,x26,x22 // d+=h 243 eor x19,x19,x24 // Maj(a,b,c) 244 eor x17,x11,x17,ror#34 // Sigma0(a) 245 add x22,x22,x19 // h+=Maj(a,b,c) 246 ldr x19,[x30],#8 // *K++, x28 in next round 247 //add x22,x22,x17 // h+=Sigma0(a) 248#ifndef __AARCH64EB__ 249 rev x9,x9 // 6 250#endif 251 add x22,x22,x17 // h+=Sigma0(a) 252 ror x16,x26,#14 253 add x21,x21,x19 // h+=K[i] 254 eor x12,x26,x26,ror#23 255 and x17,x27,x26 256 bic x19,x20,x26 257 add x21,x21,x9 // h+=X[i] 258 orr x17,x17,x19 // Ch(e,f,g) 259 eor x19,x22,x23 // a^b, b^c in next round 260 eor x16,x16,x12,ror#18 // Sigma1(e) 261 ror x12,x22,#28 262 add x21,x21,x17 // h+=Ch(e,f,g) 263 eor x17,x22,x22,ror#5 264 add x21,x21,x16 // h+=Sigma1(e) 265 and x28,x28,x19 // (b^c)&=(a^b) 266 add x25,x25,x21 // d+=h 267 eor x28,x28,x23 // Maj(a,b,c) 268 eor x17,x12,x17,ror#34 // Sigma0(a) 269 add x21,x21,x28 // h+=Maj(a,b,c) 270 ldr x28,[x30],#8 // *K++, x19 in next round 271 //add x21,x21,x17 // h+=Sigma0(a) 272#ifndef __AARCH64EB__ 273 rev x10,x10 // 7 274#endif 275 ldp x11,x12,[x1],#2*8 276 add x21,x21,x17 // h+=Sigma0(a) 277 ror x16,x25,#14 278 add x20,x20,x28 // h+=K[i] 279 eor x13,x25,x25,ror#23 280 and x17,x26,x25 281 bic x28,x27,x25 282 add x20,x20,x10 // h+=X[i] 283 orr x17,x17,x28 // Ch(e,f,g) 284 eor x28,x21,x22 // a^b, b^c in next round 285 eor x16,x16,x13,ror#18 // Sigma1(e) 286 ror x13,x21,#28 287 add x20,x20,x17 // h+=Ch(e,f,g) 288 eor x17,x21,x21,ror#5 289 add x20,x20,x16 // h+=Sigma1(e) 290 and x19,x19,x28 // (b^c)&=(a^b) 291 add x24,x24,x20 // d+=h 292 eor x19,x19,x22 // Maj(a,b,c) 293 eor x17,x13,x17,ror#34 // Sigma0(a) 294 add x20,x20,x19 // h+=Maj(a,b,c) 295 ldr x19,[x30],#8 // *K++, x28 in next round 296 //add x20,x20,x17 // h+=Sigma0(a) 297#ifndef __AARCH64EB__ 298 rev x11,x11 // 8 299#endif 300 add x20,x20,x17 // h+=Sigma0(a) 301 ror x16,x24,#14 302 add x27,x27,x19 // h+=K[i] 303 eor x14,x24,x24,ror#23 304 and x17,x25,x24 305 bic x19,x26,x24 306 add x27,x27,x11 // h+=X[i] 307 orr x17,x17,x19 // Ch(e,f,g) 308 eor x19,x20,x21 // a^b, b^c in next round 309 eor x16,x16,x14,ror#18 // Sigma1(e) 310 ror x14,x20,#28 311 add x27,x27,x17 // h+=Ch(e,f,g) 312 eor x17,x20,x20,ror#5 313 add x27,x27,x16 // h+=Sigma1(e) 314 and x28,x28,x19 // (b^c)&=(a^b) 315 add x23,x23,x27 // d+=h 316 eor x28,x28,x21 // Maj(a,b,c) 317 eor x17,x14,x17,ror#34 // Sigma0(a) 318 add x27,x27,x28 // h+=Maj(a,b,c) 319 ldr x28,[x30],#8 // *K++, x19 in next round 320 //add x27,x27,x17 // h+=Sigma0(a) 321#ifndef __AARCH64EB__ 322 rev x12,x12 // 9 323#endif 324 ldp x13,x14,[x1],#2*8 325 add x27,x27,x17 // h+=Sigma0(a) 326 ror x16,x23,#14 327 add x26,x26,x28 // h+=K[i] 328 eor x15,x23,x23,ror#23 329 and x17,x24,x23 330 bic x28,x25,x23 331 add x26,x26,x12 // h+=X[i] 332 orr x17,x17,x28 // Ch(e,f,g) 333 eor x28,x27,x20 // a^b, b^c in next round 334 eor x16,x16,x15,ror#18 // Sigma1(e) 335 ror x15,x27,#28 336 add x26,x26,x17 // h+=Ch(e,f,g) 337 eor x17,x27,x27,ror#5 338 add x26,x26,x16 // h+=Sigma1(e) 339 and x19,x19,x28 // (b^c)&=(a^b) 340 add x22,x22,x26 // d+=h 341 eor x19,x19,x20 // Maj(a,b,c) 342 eor x17,x15,x17,ror#34 // Sigma0(a) 343 add x26,x26,x19 // h+=Maj(a,b,c) 344 ldr x19,[x30],#8 // *K++, x28 in next round 345 //add x26,x26,x17 // h+=Sigma0(a) 346#ifndef __AARCH64EB__ 347 rev x13,x13 // 10 348#endif 349 add x26,x26,x17 // h+=Sigma0(a) 350 ror x16,x22,#14 351 add x25,x25,x19 // h+=K[i] 352 eor x0,x22,x22,ror#23 353 and x17,x23,x22 354 bic x19,x24,x22 355 add x25,x25,x13 // h+=X[i] 356 orr x17,x17,x19 // Ch(e,f,g) 357 eor x19,x26,x27 // a^b, b^c in next round 358 eor x16,x16,x0,ror#18 // Sigma1(e) 359 ror x0,x26,#28 360 add x25,x25,x17 // h+=Ch(e,f,g) 361 eor x17,x26,x26,ror#5 362 add x25,x25,x16 // h+=Sigma1(e) 363 and x28,x28,x19 // (b^c)&=(a^b) 364 add x21,x21,x25 // d+=h 365 eor x28,x28,x27 // Maj(a,b,c) 366 eor x17,x0,x17,ror#34 // Sigma0(a) 367 add x25,x25,x28 // h+=Maj(a,b,c) 368 ldr x28,[x30],#8 // *K++, x19 in next round 369 //add x25,x25,x17 // h+=Sigma0(a) 370#ifndef __AARCH64EB__ 371 rev x14,x14 // 11 372#endif 373 ldp x15,x0,[x1],#2*8 374 add x25,x25,x17 // h+=Sigma0(a) 375 str x6,[sp,#24] 376 ror x16,x21,#14 377 add x24,x24,x28 // h+=K[i] 378 eor x6,x21,x21,ror#23 379 and x17,x22,x21 380 bic x28,x23,x21 381 add x24,x24,x14 // h+=X[i] 382 orr x17,x17,x28 // Ch(e,f,g) 383 eor x28,x25,x26 // a^b, b^c in next round 384 eor x16,x16,x6,ror#18 // Sigma1(e) 385 ror x6,x25,#28 386 add x24,x24,x17 // h+=Ch(e,f,g) 387 eor x17,x25,x25,ror#5 388 add x24,x24,x16 // h+=Sigma1(e) 389 and x19,x19,x28 // (b^c)&=(a^b) 390 add x20,x20,x24 // d+=h 391 eor x19,x19,x26 // Maj(a,b,c) 392 eor x17,x6,x17,ror#34 // Sigma0(a) 393 add x24,x24,x19 // h+=Maj(a,b,c) 394 ldr x19,[x30],#8 // *K++, x28 in next round 395 //add x24,x24,x17 // h+=Sigma0(a) 396#ifndef __AARCH64EB__ 397 rev x15,x15 // 12 398#endif 399 add x24,x24,x17 // h+=Sigma0(a) 400 str x7,[sp,#0] 401 ror x16,x20,#14 402 add x23,x23,x19 // h+=K[i] 403 eor x7,x20,x20,ror#23 404 and x17,x21,x20 405 bic x19,x22,x20 406 add x23,x23,x15 // h+=X[i] 407 orr x17,x17,x19 // Ch(e,f,g) 408 eor x19,x24,x25 // a^b, b^c in next round 409 eor x16,x16,x7,ror#18 // Sigma1(e) 410 ror x7,x24,#28 411 add x23,x23,x17 // h+=Ch(e,f,g) 412 eor x17,x24,x24,ror#5 413 add x23,x23,x16 // h+=Sigma1(e) 414 and x28,x28,x19 // (b^c)&=(a^b) 415 add x27,x27,x23 // d+=h 416 eor x28,x28,x25 // Maj(a,b,c) 417 eor x17,x7,x17,ror#34 // Sigma0(a) 418 add x23,x23,x28 // h+=Maj(a,b,c) 419 ldr x28,[x30],#8 // *K++, x19 in next round 420 //add x23,x23,x17 // h+=Sigma0(a) 421#ifndef __AARCH64EB__ 422 rev x0,x0 // 13 423#endif 424 ldp x1,x2,[x1] 425 add x23,x23,x17 // h+=Sigma0(a) 426 str x8,[sp,#8] 427 ror x16,x27,#14 428 add x22,x22,x28 // h+=K[i] 429 eor x8,x27,x27,ror#23 430 and x17,x20,x27 431 bic x28,x21,x27 432 add x22,x22,x0 // h+=X[i] 433 orr x17,x17,x28 // Ch(e,f,g) 434 eor x28,x23,x24 // a^b, b^c in next round 435 eor x16,x16,x8,ror#18 // Sigma1(e) 436 ror x8,x23,#28 437 add x22,x22,x17 // h+=Ch(e,f,g) 438 eor x17,x23,x23,ror#5 439 add x22,x22,x16 // h+=Sigma1(e) 440 and x19,x19,x28 // (b^c)&=(a^b) 441 add x26,x26,x22 // d+=h 442 eor x19,x19,x24 // Maj(a,b,c) 443 eor x17,x8,x17,ror#34 // Sigma0(a) 444 add x22,x22,x19 // h+=Maj(a,b,c) 445 ldr x19,[x30],#8 // *K++, x28 in next round 446 //add x22,x22,x17 // h+=Sigma0(a) 447#ifndef __AARCH64EB__ 448 rev x1,x1 // 14 449#endif 450 ldr x6,[sp,#24] 451 add x22,x22,x17 // h+=Sigma0(a) 452 str x9,[sp,#16] 453 ror x16,x26,#14 454 add x21,x21,x19 // h+=K[i] 455 eor x9,x26,x26,ror#23 456 and x17,x27,x26 457 bic x19,x20,x26 458 add x21,x21,x1 // h+=X[i] 459 orr x17,x17,x19 // Ch(e,f,g) 460 eor x19,x22,x23 // a^b, b^c in next round 461 eor x16,x16,x9,ror#18 // Sigma1(e) 462 ror x9,x22,#28 463 add x21,x21,x17 // h+=Ch(e,f,g) 464 eor x17,x22,x22,ror#5 465 add x21,x21,x16 // h+=Sigma1(e) 466 and x28,x28,x19 // (b^c)&=(a^b) 467 add x25,x25,x21 // d+=h 468 eor x28,x28,x23 // Maj(a,b,c) 469 eor x17,x9,x17,ror#34 // Sigma0(a) 470 add x21,x21,x28 // h+=Maj(a,b,c) 471 ldr x28,[x30],#8 // *K++, x19 in next round 472 //add x21,x21,x17 // h+=Sigma0(a) 473#ifndef __AARCH64EB__ 474 rev x2,x2 // 15 475#endif 476 ldr x7,[sp,#0] 477 add x21,x21,x17 // h+=Sigma0(a) 478 str x10,[sp,#24] 479 ror x16,x25,#14 480 add x20,x20,x28 // h+=K[i] 481 ror x9,x4,#1 482 and x17,x26,x25 483 ror x8,x1,#19 484 bic x28,x27,x25 485 ror x10,x21,#28 486 add x20,x20,x2 // h+=X[i] 487 eor x16,x16,x25,ror#18 488 eor x9,x9,x4,ror#8 489 orr x17,x17,x28 // Ch(e,f,g) 490 eor x28,x21,x22 // a^b, b^c in next round 491 eor x16,x16,x25,ror#41 // Sigma1(e) 492 eor x10,x10,x21,ror#34 493 add x20,x20,x17 // h+=Ch(e,f,g) 494 and x19,x19,x28 // (b^c)&=(a^b) 495 eor x8,x8,x1,ror#61 496 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 497 add x20,x20,x16 // h+=Sigma1(e) 498 eor x19,x19,x22 // Maj(a,b,c) 499 eor x17,x10,x21,ror#39 // Sigma0(a) 500 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 501 add x3,x3,x12 502 add x24,x24,x20 // d+=h 503 add x20,x20,x19 // h+=Maj(a,b,c) 504 ldr x19,[x30],#8 // *K++, x28 in next round 505 add x3,x3,x9 506 add x20,x20,x17 // h+=Sigma0(a) 507 add x3,x3,x8 508.Loop_16_xx: 509 ldr x8,[sp,#8] 510 str x11,[sp,#0] 511 ror x16,x24,#14 512 add x27,x27,x19 // h+=K[i] 513 ror x10,x5,#1 514 and x17,x25,x24 515 ror x9,x2,#19 516 bic x19,x26,x24 517 ror x11,x20,#28 518 add x27,x27,x3 // h+=X[i] 519 eor x16,x16,x24,ror#18 520 eor x10,x10,x5,ror#8 521 orr x17,x17,x19 // Ch(e,f,g) 522 eor x19,x20,x21 // a^b, b^c in next round 523 eor x16,x16,x24,ror#41 // Sigma1(e) 524 eor x11,x11,x20,ror#34 525 add x27,x27,x17 // h+=Ch(e,f,g) 526 and x28,x28,x19 // (b^c)&=(a^b) 527 eor x9,x9,x2,ror#61 528 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 529 add x27,x27,x16 // h+=Sigma1(e) 530 eor x28,x28,x21 // Maj(a,b,c) 531 eor x17,x11,x20,ror#39 // Sigma0(a) 532 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 533 add x4,x4,x13 534 add x23,x23,x27 // d+=h 535 add x27,x27,x28 // h+=Maj(a,b,c) 536 ldr x28,[x30],#8 // *K++, x19 in next round 537 add x4,x4,x10 538 add x27,x27,x17 // h+=Sigma0(a) 539 add x4,x4,x9 540 ldr x9,[sp,#16] 541 str x12,[sp,#8] 542 ror x16,x23,#14 543 add x26,x26,x28 // h+=K[i] 544 ror x11,x6,#1 545 and x17,x24,x23 546 ror x10,x3,#19 547 bic x28,x25,x23 548 ror x12,x27,#28 549 add x26,x26,x4 // h+=X[i] 550 eor x16,x16,x23,ror#18 551 eor x11,x11,x6,ror#8 552 orr x17,x17,x28 // Ch(e,f,g) 553 eor x28,x27,x20 // a^b, b^c in next round 554 eor x16,x16,x23,ror#41 // Sigma1(e) 555 eor x12,x12,x27,ror#34 556 add x26,x26,x17 // h+=Ch(e,f,g) 557 and x19,x19,x28 // (b^c)&=(a^b) 558 eor x10,x10,x3,ror#61 559 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 560 add x26,x26,x16 // h+=Sigma1(e) 561 eor x19,x19,x20 // Maj(a,b,c) 562 eor x17,x12,x27,ror#39 // Sigma0(a) 563 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 564 add x5,x5,x14 565 add x22,x22,x26 // d+=h 566 add x26,x26,x19 // h+=Maj(a,b,c) 567 ldr x19,[x30],#8 // *K++, x28 in next round 568 add x5,x5,x11 569 add x26,x26,x17 // h+=Sigma0(a) 570 add x5,x5,x10 571 ldr x10,[sp,#24] 572 str x13,[sp,#16] 573 ror x16,x22,#14 574 add x25,x25,x19 // h+=K[i] 575 ror x12,x7,#1 576 and x17,x23,x22 577 ror x11,x4,#19 578 bic x19,x24,x22 579 ror x13,x26,#28 580 add x25,x25,x5 // h+=X[i] 581 eor x16,x16,x22,ror#18 582 eor x12,x12,x7,ror#8 583 orr x17,x17,x19 // Ch(e,f,g) 584 eor x19,x26,x27 // a^b, b^c in next round 585 eor x16,x16,x22,ror#41 // Sigma1(e) 586 eor x13,x13,x26,ror#34 587 add x25,x25,x17 // h+=Ch(e,f,g) 588 and x28,x28,x19 // (b^c)&=(a^b) 589 eor x11,x11,x4,ror#61 590 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 591 add x25,x25,x16 // h+=Sigma1(e) 592 eor x28,x28,x27 // Maj(a,b,c) 593 eor x17,x13,x26,ror#39 // Sigma0(a) 594 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 595 add x6,x6,x15 596 add x21,x21,x25 // d+=h 597 add x25,x25,x28 // h+=Maj(a,b,c) 598 ldr x28,[x30],#8 // *K++, x19 in next round 599 add x6,x6,x12 600 add x25,x25,x17 // h+=Sigma0(a) 601 add x6,x6,x11 602 ldr x11,[sp,#0] 603 str x14,[sp,#24] 604 ror x16,x21,#14 605 add x24,x24,x28 // h+=K[i] 606 ror x13,x8,#1 607 and x17,x22,x21 608 ror x12,x5,#19 609 bic x28,x23,x21 610 ror x14,x25,#28 611 add x24,x24,x6 // h+=X[i] 612 eor x16,x16,x21,ror#18 613 eor x13,x13,x8,ror#8 614 orr x17,x17,x28 // Ch(e,f,g) 615 eor x28,x25,x26 // a^b, b^c in next round 616 eor x16,x16,x21,ror#41 // Sigma1(e) 617 eor x14,x14,x25,ror#34 618 add x24,x24,x17 // h+=Ch(e,f,g) 619 and x19,x19,x28 // (b^c)&=(a^b) 620 eor x12,x12,x5,ror#61 621 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 622 add x24,x24,x16 // h+=Sigma1(e) 623 eor x19,x19,x26 // Maj(a,b,c) 624 eor x17,x14,x25,ror#39 // Sigma0(a) 625 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 626 add x7,x7,x0 627 add x20,x20,x24 // d+=h 628 add x24,x24,x19 // h+=Maj(a,b,c) 629 ldr x19,[x30],#8 // *K++, x28 in next round 630 add x7,x7,x13 631 add x24,x24,x17 // h+=Sigma0(a) 632 add x7,x7,x12 633 ldr x12,[sp,#8] 634 str x15,[sp,#0] 635 ror x16,x20,#14 636 add x23,x23,x19 // h+=K[i] 637 ror x14,x9,#1 638 and x17,x21,x20 639 ror x13,x6,#19 640 bic x19,x22,x20 641 ror x15,x24,#28 642 add x23,x23,x7 // h+=X[i] 643 eor x16,x16,x20,ror#18 644 eor x14,x14,x9,ror#8 645 orr x17,x17,x19 // Ch(e,f,g) 646 eor x19,x24,x25 // a^b, b^c in next round 647 eor x16,x16,x20,ror#41 // Sigma1(e) 648 eor x15,x15,x24,ror#34 649 add x23,x23,x17 // h+=Ch(e,f,g) 650 and x28,x28,x19 // (b^c)&=(a^b) 651 eor x13,x13,x6,ror#61 652 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 653 add x23,x23,x16 // h+=Sigma1(e) 654 eor x28,x28,x25 // Maj(a,b,c) 655 eor x17,x15,x24,ror#39 // Sigma0(a) 656 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 657 add x8,x8,x1 658 add x27,x27,x23 // d+=h 659 add x23,x23,x28 // h+=Maj(a,b,c) 660 ldr x28,[x30],#8 // *K++, x19 in next round 661 add x8,x8,x14 662 add x23,x23,x17 // h+=Sigma0(a) 663 add x8,x8,x13 664 ldr x13,[sp,#16] 665 str x0,[sp,#8] 666 ror x16,x27,#14 667 add x22,x22,x28 // h+=K[i] 668 ror x15,x10,#1 669 and x17,x20,x27 670 ror x14,x7,#19 671 bic x28,x21,x27 672 ror x0,x23,#28 673 add x22,x22,x8 // h+=X[i] 674 eor x16,x16,x27,ror#18 675 eor x15,x15,x10,ror#8 676 orr x17,x17,x28 // Ch(e,f,g) 677 eor x28,x23,x24 // a^b, b^c in next round 678 eor x16,x16,x27,ror#41 // Sigma1(e) 679 eor x0,x0,x23,ror#34 680 add x22,x22,x17 // h+=Ch(e,f,g) 681 and x19,x19,x28 // (b^c)&=(a^b) 682 eor x14,x14,x7,ror#61 683 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 684 add x22,x22,x16 // h+=Sigma1(e) 685 eor x19,x19,x24 // Maj(a,b,c) 686 eor x17,x0,x23,ror#39 // Sigma0(a) 687 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 688 add x9,x9,x2 689 add x26,x26,x22 // d+=h 690 add x22,x22,x19 // h+=Maj(a,b,c) 691 ldr x19,[x30],#8 // *K++, x28 in next round 692 add x9,x9,x15 693 add x22,x22,x17 // h+=Sigma0(a) 694 add x9,x9,x14 695 ldr x14,[sp,#24] 696 str x1,[sp,#16] 697 ror x16,x26,#14 698 add x21,x21,x19 // h+=K[i] 699 ror x0,x11,#1 700 and x17,x27,x26 701 ror x15,x8,#19 702 bic x19,x20,x26 703 ror x1,x22,#28 704 add x21,x21,x9 // h+=X[i] 705 eor x16,x16,x26,ror#18 706 eor x0,x0,x11,ror#8 707 orr x17,x17,x19 // Ch(e,f,g) 708 eor x19,x22,x23 // a^b, b^c in next round 709 eor x16,x16,x26,ror#41 // Sigma1(e) 710 eor x1,x1,x22,ror#34 711 add x21,x21,x17 // h+=Ch(e,f,g) 712 and x28,x28,x19 // (b^c)&=(a^b) 713 eor x15,x15,x8,ror#61 714 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 715 add x21,x21,x16 // h+=Sigma1(e) 716 eor x28,x28,x23 // Maj(a,b,c) 717 eor x17,x1,x22,ror#39 // Sigma0(a) 718 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 719 add x10,x10,x3 720 add x25,x25,x21 // d+=h 721 add x21,x21,x28 // h+=Maj(a,b,c) 722 ldr x28,[x30],#8 // *K++, x19 in next round 723 add x10,x10,x0 724 add x21,x21,x17 // h+=Sigma0(a) 725 add x10,x10,x15 726 ldr x15,[sp,#0] 727 str x2,[sp,#24] 728 ror x16,x25,#14 729 add x20,x20,x28 // h+=K[i] 730 ror x1,x12,#1 731 and x17,x26,x25 732 ror x0,x9,#19 733 bic x28,x27,x25 734 ror x2,x21,#28 735 add x20,x20,x10 // h+=X[i] 736 eor x16,x16,x25,ror#18 737 eor x1,x1,x12,ror#8 738 orr x17,x17,x28 // Ch(e,f,g) 739 eor x28,x21,x22 // a^b, b^c in next round 740 eor x16,x16,x25,ror#41 // Sigma1(e) 741 eor x2,x2,x21,ror#34 742 add x20,x20,x17 // h+=Ch(e,f,g) 743 and x19,x19,x28 // (b^c)&=(a^b) 744 eor x0,x0,x9,ror#61 745 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 746 add x20,x20,x16 // h+=Sigma1(e) 747 eor x19,x19,x22 // Maj(a,b,c) 748 eor x17,x2,x21,ror#39 // Sigma0(a) 749 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 750 add x11,x11,x4 751 add x24,x24,x20 // d+=h 752 add x20,x20,x19 // h+=Maj(a,b,c) 753 ldr x19,[x30],#8 // *K++, x28 in next round 754 add x11,x11,x1 755 add x20,x20,x17 // h+=Sigma0(a) 756 add x11,x11,x0 757 ldr x0,[sp,#8] 758 str x3,[sp,#0] 759 ror x16,x24,#14 760 add x27,x27,x19 // h+=K[i] 761 ror x2,x13,#1 762 and x17,x25,x24 763 ror x1,x10,#19 764 bic x19,x26,x24 765 ror x3,x20,#28 766 add x27,x27,x11 // h+=X[i] 767 eor x16,x16,x24,ror#18 768 eor x2,x2,x13,ror#8 769 orr x17,x17,x19 // Ch(e,f,g) 770 eor x19,x20,x21 // a^b, b^c in next round 771 eor x16,x16,x24,ror#41 // Sigma1(e) 772 eor x3,x3,x20,ror#34 773 add x27,x27,x17 // h+=Ch(e,f,g) 774 and x28,x28,x19 // (b^c)&=(a^b) 775 eor x1,x1,x10,ror#61 776 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 777 add x27,x27,x16 // h+=Sigma1(e) 778 eor x28,x28,x21 // Maj(a,b,c) 779 eor x17,x3,x20,ror#39 // Sigma0(a) 780 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 781 add x12,x12,x5 782 add x23,x23,x27 // d+=h 783 add x27,x27,x28 // h+=Maj(a,b,c) 784 ldr x28,[x30],#8 // *K++, x19 in next round 785 add x12,x12,x2 786 add x27,x27,x17 // h+=Sigma0(a) 787 add x12,x12,x1 788 ldr x1,[sp,#16] 789 str x4,[sp,#8] 790 ror x16,x23,#14 791 add x26,x26,x28 // h+=K[i] 792 ror x3,x14,#1 793 and x17,x24,x23 794 ror x2,x11,#19 795 bic x28,x25,x23 796 ror x4,x27,#28 797 add x26,x26,x12 // h+=X[i] 798 eor x16,x16,x23,ror#18 799 eor x3,x3,x14,ror#8 800 orr x17,x17,x28 // Ch(e,f,g) 801 eor x28,x27,x20 // a^b, b^c in next round 802 eor x16,x16,x23,ror#41 // Sigma1(e) 803 eor x4,x4,x27,ror#34 804 add x26,x26,x17 // h+=Ch(e,f,g) 805 and x19,x19,x28 // (b^c)&=(a^b) 806 eor x2,x2,x11,ror#61 807 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 808 add x26,x26,x16 // h+=Sigma1(e) 809 eor x19,x19,x20 // Maj(a,b,c) 810 eor x17,x4,x27,ror#39 // Sigma0(a) 811 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 812 add x13,x13,x6 813 add x22,x22,x26 // d+=h 814 add x26,x26,x19 // h+=Maj(a,b,c) 815 ldr x19,[x30],#8 // *K++, x28 in next round 816 add x13,x13,x3 817 add x26,x26,x17 // h+=Sigma0(a) 818 add x13,x13,x2 819 ldr x2,[sp,#24] 820 str x5,[sp,#16] 821 ror x16,x22,#14 822 add x25,x25,x19 // h+=K[i] 823 ror x4,x15,#1 824 and x17,x23,x22 825 ror x3,x12,#19 826 bic x19,x24,x22 827 ror x5,x26,#28 828 add x25,x25,x13 // h+=X[i] 829 eor x16,x16,x22,ror#18 830 eor x4,x4,x15,ror#8 831 orr x17,x17,x19 // Ch(e,f,g) 832 eor x19,x26,x27 // a^b, b^c in next round 833 eor x16,x16,x22,ror#41 // Sigma1(e) 834 eor x5,x5,x26,ror#34 835 add x25,x25,x17 // h+=Ch(e,f,g) 836 and x28,x28,x19 // (b^c)&=(a^b) 837 eor x3,x3,x12,ror#61 838 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 839 add x25,x25,x16 // h+=Sigma1(e) 840 eor x28,x28,x27 // Maj(a,b,c) 841 eor x17,x5,x26,ror#39 // Sigma0(a) 842 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 843 add x14,x14,x7 844 add x21,x21,x25 // d+=h 845 add x25,x25,x28 // h+=Maj(a,b,c) 846 ldr x28,[x30],#8 // *K++, x19 in next round 847 add x14,x14,x4 848 add x25,x25,x17 // h+=Sigma0(a) 849 add x14,x14,x3 850 ldr x3,[sp,#0] 851 str x6,[sp,#24] 852 ror x16,x21,#14 853 add x24,x24,x28 // h+=K[i] 854 ror x5,x0,#1 855 and x17,x22,x21 856 ror x4,x13,#19 857 bic x28,x23,x21 858 ror x6,x25,#28 859 add x24,x24,x14 // h+=X[i] 860 eor x16,x16,x21,ror#18 861 eor x5,x5,x0,ror#8 862 orr x17,x17,x28 // Ch(e,f,g) 863 eor x28,x25,x26 // a^b, b^c in next round 864 eor x16,x16,x21,ror#41 // Sigma1(e) 865 eor x6,x6,x25,ror#34 866 add x24,x24,x17 // h+=Ch(e,f,g) 867 and x19,x19,x28 // (b^c)&=(a^b) 868 eor x4,x4,x13,ror#61 869 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 870 add x24,x24,x16 // h+=Sigma1(e) 871 eor x19,x19,x26 // Maj(a,b,c) 872 eor x17,x6,x25,ror#39 // Sigma0(a) 873 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 874 add x15,x15,x8 875 add x20,x20,x24 // d+=h 876 add x24,x24,x19 // h+=Maj(a,b,c) 877 ldr x19,[x30],#8 // *K++, x28 in next round 878 add x15,x15,x5 879 add x24,x24,x17 // h+=Sigma0(a) 880 add x15,x15,x4 881 ldr x4,[sp,#8] 882 str x7,[sp,#0] 883 ror x16,x20,#14 884 add x23,x23,x19 // h+=K[i] 885 ror x6,x1,#1 886 and x17,x21,x20 887 ror x5,x14,#19 888 bic x19,x22,x20 889 ror x7,x24,#28 890 add x23,x23,x15 // h+=X[i] 891 eor x16,x16,x20,ror#18 892 eor x6,x6,x1,ror#8 893 orr x17,x17,x19 // Ch(e,f,g) 894 eor x19,x24,x25 // a^b, b^c in next round 895 eor x16,x16,x20,ror#41 // Sigma1(e) 896 eor x7,x7,x24,ror#34 897 add x23,x23,x17 // h+=Ch(e,f,g) 898 and x28,x28,x19 // (b^c)&=(a^b) 899 eor x5,x5,x14,ror#61 900 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 901 add x23,x23,x16 // h+=Sigma1(e) 902 eor x28,x28,x25 // Maj(a,b,c) 903 eor x17,x7,x24,ror#39 // Sigma0(a) 904 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 905 add x0,x0,x9 906 add x27,x27,x23 // d+=h 907 add x23,x23,x28 // h+=Maj(a,b,c) 908 ldr x28,[x30],#8 // *K++, x19 in next round 909 add x0,x0,x6 910 add x23,x23,x17 // h+=Sigma0(a) 911 add x0,x0,x5 912 ldr x5,[sp,#16] 913 str x8,[sp,#8] 914 ror x16,x27,#14 915 add x22,x22,x28 // h+=K[i] 916 ror x7,x2,#1 917 and x17,x20,x27 918 ror x6,x15,#19 919 bic x28,x21,x27 920 ror x8,x23,#28 921 add x22,x22,x0 // h+=X[i] 922 eor x16,x16,x27,ror#18 923 eor x7,x7,x2,ror#8 924 orr x17,x17,x28 // Ch(e,f,g) 925 eor x28,x23,x24 // a^b, b^c in next round 926 eor x16,x16,x27,ror#41 // Sigma1(e) 927 eor x8,x8,x23,ror#34 928 add x22,x22,x17 // h+=Ch(e,f,g) 929 and x19,x19,x28 // (b^c)&=(a^b) 930 eor x6,x6,x15,ror#61 931 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 932 add x22,x22,x16 // h+=Sigma1(e) 933 eor x19,x19,x24 // Maj(a,b,c) 934 eor x17,x8,x23,ror#39 // Sigma0(a) 935 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 936 add x1,x1,x10 937 add x26,x26,x22 // d+=h 938 add x22,x22,x19 // h+=Maj(a,b,c) 939 ldr x19,[x30],#8 // *K++, x28 in next round 940 add x1,x1,x7 941 add x22,x22,x17 // h+=Sigma0(a) 942 add x1,x1,x6 943 ldr x6,[sp,#24] 944 str x9,[sp,#16] 945 ror x16,x26,#14 946 add x21,x21,x19 // h+=K[i] 947 ror x8,x3,#1 948 and x17,x27,x26 949 ror x7,x0,#19 950 bic x19,x20,x26 951 ror x9,x22,#28 952 add x21,x21,x1 // h+=X[i] 953 eor x16,x16,x26,ror#18 954 eor x8,x8,x3,ror#8 955 orr x17,x17,x19 // Ch(e,f,g) 956 eor x19,x22,x23 // a^b, b^c in next round 957 eor x16,x16,x26,ror#41 // Sigma1(e) 958 eor x9,x9,x22,ror#34 959 add x21,x21,x17 // h+=Ch(e,f,g) 960 and x28,x28,x19 // (b^c)&=(a^b) 961 eor x7,x7,x0,ror#61 962 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 963 add x21,x21,x16 // h+=Sigma1(e) 964 eor x28,x28,x23 // Maj(a,b,c) 965 eor x17,x9,x22,ror#39 // Sigma0(a) 966 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 967 add x2,x2,x11 968 add x25,x25,x21 // d+=h 969 add x21,x21,x28 // h+=Maj(a,b,c) 970 ldr x28,[x30],#8 // *K++, x19 in next round 971 add x2,x2,x8 972 add x21,x21,x17 // h+=Sigma0(a) 973 add x2,x2,x7 974 ldr x7,[sp,#0] 975 str x10,[sp,#24] 976 ror x16,x25,#14 977 add x20,x20,x28 // h+=K[i] 978 ror x9,x4,#1 979 and x17,x26,x25 980 ror x8,x1,#19 981 bic x28,x27,x25 982 ror x10,x21,#28 983 add x20,x20,x2 // h+=X[i] 984 eor x16,x16,x25,ror#18 985 eor x9,x9,x4,ror#8 986 orr x17,x17,x28 // Ch(e,f,g) 987 eor x28,x21,x22 // a^b, b^c in next round 988 eor x16,x16,x25,ror#41 // Sigma1(e) 989 eor x10,x10,x21,ror#34 990 add x20,x20,x17 // h+=Ch(e,f,g) 991 and x19,x19,x28 // (b^c)&=(a^b) 992 eor x8,x8,x1,ror#61 993 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 994 add x20,x20,x16 // h+=Sigma1(e) 995 eor x19,x19,x22 // Maj(a,b,c) 996 eor x17,x10,x21,ror#39 // Sigma0(a) 997 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 998 add x3,x3,x12 999 add x24,x24,x20 // d+=h 1000 add x20,x20,x19 // h+=Maj(a,b,c) 1001 ldr x19,[x30],#8 // *K++, x28 in next round 1002 add x3,x3,x9 1003 add x20,x20,x17 // h+=Sigma0(a) 1004 add x3,x3,x8 1005 cbnz x19,.Loop_16_xx 1006 1007 ldp x0,x2,[x29,#96] 1008 ldr x1,[x29,#112] 1009 sub x30,x30,#648 // rewind 1010 1011 ldp x3,x4,[x0] 1012 ldp x5,x6,[x0,#2*8] 1013 add x1,x1,#14*8 // advance input pointer 1014 ldp x7,x8,[x0,#4*8] 1015 add x20,x20,x3 1016 ldp x9,x10,[x0,#6*8] 1017 add x21,x21,x4 1018 add x22,x22,x5 1019 add x23,x23,x6 1020 stp x20,x21,[x0] 1021 add x24,x24,x7 1022 add x25,x25,x8 1023 stp x22,x23,[x0,#2*8] 1024 add x26,x26,x9 1025 add x27,x27,x10 1026 cmp x1,x2 1027 stp x24,x25,[x0,#4*8] 1028 stp x26,x27,[x0,#6*8] 1029 b.ne .Loop 1030 1031 ldp x19,x20,[x29,#16] 1032 add sp,sp,#4*8 1033 ldp x21,x22,[x29,#32] 1034 ldp x23,x24,[x29,#48] 1035 ldp x25,x26,[x29,#64] 1036 ldp x27,x28,[x29,#80] 1037 ldp x29,x30,[sp],#128 1038 AARCH64_VALIDATE_LINK_REGISTER 1039 ret 1040.size sha512_block_data_order,.-sha512_block_data_order 1041 1042.align 6 1043.type .LK512,%object 1044.LK512: 1045.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1046.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1047.quad 0x3956c25bf348b538,0x59f111f1b605d019 1048.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1049.quad 0xd807aa98a3030242,0x12835b0145706fbe 1050.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1051.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1052.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1053.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1054.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1055.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1056.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1057.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1058.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1059.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1060.quad 0x06ca6351e003826f,0x142929670a0e6e70 1061.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1062.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1063.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1064.quad 0x81c2c92e47edaee6,0x92722c851482353b 1065.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1066.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1067.quad 0xd192e819d6ef5218,0xd69906245565a910 1068.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1069.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1070.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1071.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1072.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1073.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1074.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1075.quad 0x90befffa23631e28,0xa4506cebde82bde9 1076.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1077.quad 0xca273eceea26619c,0xd186b8c721c0c207 1078.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1079.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1080.quad 0x113f9804bef90dae,0x1b710b35131c471b 1081.quad 0x28db77f523047d84,0x32caab7b40c72493 1082.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1083.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1084.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1085.quad 0 // terminator 1086.size .LK512,.-.LK512 1087.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1088.align 2 1089.align 2 1090#ifndef __KERNEL__ 1091.type sha512_block_armv8,%function 1092.align 6 1093sha512_block_armv8: 1094.Lv8_entry: 1095 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later 1096 stp x29,x30,[sp,#-16]! 1097 add x29,sp,#0 1098 1099 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1100 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1101 1102 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1103 adr x3,.LK512 1104 1105 rev64 v16.16b,v16.16b 1106 rev64 v17.16b,v17.16b 1107 rev64 v18.16b,v18.16b 1108 rev64 v19.16b,v19.16b 1109 rev64 v20.16b,v20.16b 1110 rev64 v21.16b,v21.16b 1111 rev64 v22.16b,v22.16b 1112 rev64 v23.16b,v23.16b 1113 b .Loop_hw 1114 1115.align 4 1116.Loop_hw: 1117 ld1 {v24.2d},[x3],#16 1118 subs x2,x2,#1 1119 sub x4,x1,#128 1120 orr v26.16b,v0.16b,v0.16b // offload 1121 orr v27.16b,v1.16b,v1.16b 1122 orr v28.16b,v2.16b,v2.16b 1123 orr v29.16b,v3.16b,v3.16b 1124 csel x1,x1,x4,ne // conditional rewind 1125 add v24.2d,v24.2d,v16.2d 1126 ld1 {v25.2d},[x3],#16 1127 ext v24.16b,v24.16b,v24.16b,#8 1128 ext v5.16b,v2.16b,v3.16b,#8 1129 ext v6.16b,v1.16b,v2.16b,#8 1130 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1131.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1132 ext v7.16b,v20.16b,v21.16b,#8 1133.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1134.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1135 add v4.2d,v1.2d,v3.2d // "D + T1" 1136.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1137 add v25.2d,v25.2d,v17.2d 1138 ld1 {v24.2d},[x3],#16 1139 ext v25.16b,v25.16b,v25.16b,#8 1140 ext v5.16b,v4.16b,v2.16b,#8 1141 ext v6.16b,v0.16b,v4.16b,#8 1142 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1143.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1144 ext v7.16b,v21.16b,v22.16b,#8 1145.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1146.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1147 add v1.2d,v0.2d,v2.2d // "D + T1" 1148.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1149 add v24.2d,v24.2d,v18.2d 1150 ld1 {v25.2d},[x3],#16 1151 ext v24.16b,v24.16b,v24.16b,#8 1152 ext v5.16b,v1.16b,v4.16b,#8 1153 ext v6.16b,v3.16b,v1.16b,#8 1154 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1155.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1156 ext v7.16b,v22.16b,v23.16b,#8 1157.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1158.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1159 add v0.2d,v3.2d,v4.2d // "D + T1" 1160.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1161 add v25.2d,v25.2d,v19.2d 1162 ld1 {v24.2d},[x3],#16 1163 ext v25.16b,v25.16b,v25.16b,#8 1164 ext v5.16b,v0.16b,v1.16b,#8 1165 ext v6.16b,v2.16b,v0.16b,#8 1166 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1167.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1168 ext v7.16b,v23.16b,v16.16b,#8 1169.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1170.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1171 add v3.2d,v2.2d,v1.2d // "D + T1" 1172.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1173 add v24.2d,v24.2d,v20.2d 1174 ld1 {v25.2d},[x3],#16 1175 ext v24.16b,v24.16b,v24.16b,#8 1176 ext v5.16b,v3.16b,v0.16b,#8 1177 ext v6.16b,v4.16b,v3.16b,#8 1178 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1179.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1180 ext v7.16b,v16.16b,v17.16b,#8 1181.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1182.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1183 add v2.2d,v4.2d,v0.2d // "D + T1" 1184.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1185 add v25.2d,v25.2d,v21.2d 1186 ld1 {v24.2d},[x3],#16 1187 ext v25.16b,v25.16b,v25.16b,#8 1188 ext v5.16b,v2.16b,v3.16b,#8 1189 ext v6.16b,v1.16b,v2.16b,#8 1190 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1191.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1192 ext v7.16b,v17.16b,v18.16b,#8 1193.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1194.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1195 add v4.2d,v1.2d,v3.2d // "D + T1" 1196.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1197 add v24.2d,v24.2d,v22.2d 1198 ld1 {v25.2d},[x3],#16 1199 ext v24.16b,v24.16b,v24.16b,#8 1200 ext v5.16b,v4.16b,v2.16b,#8 1201 ext v6.16b,v0.16b,v4.16b,#8 1202 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1203.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1204 ext v7.16b,v18.16b,v19.16b,#8 1205.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1206.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1207 add v1.2d,v0.2d,v2.2d // "D + T1" 1208.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1209 add v25.2d,v25.2d,v23.2d 1210 ld1 {v24.2d},[x3],#16 1211 ext v25.16b,v25.16b,v25.16b,#8 1212 ext v5.16b,v1.16b,v4.16b,#8 1213 ext v6.16b,v3.16b,v1.16b,#8 1214 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1215.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1216 ext v7.16b,v19.16b,v20.16b,#8 1217.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1218.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1219 add v0.2d,v3.2d,v4.2d // "D + T1" 1220.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1221 add v24.2d,v24.2d,v16.2d 1222 ld1 {v25.2d},[x3],#16 1223 ext v24.16b,v24.16b,v24.16b,#8 1224 ext v5.16b,v0.16b,v1.16b,#8 1225 ext v6.16b,v2.16b,v0.16b,#8 1226 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1227.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1228 ext v7.16b,v20.16b,v21.16b,#8 1229.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1230.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1231 add v3.2d,v2.2d,v1.2d // "D + T1" 1232.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1233 add v25.2d,v25.2d,v17.2d 1234 ld1 {v24.2d},[x3],#16 1235 ext v25.16b,v25.16b,v25.16b,#8 1236 ext v5.16b,v3.16b,v0.16b,#8 1237 ext v6.16b,v4.16b,v3.16b,#8 1238 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1239.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1240 ext v7.16b,v21.16b,v22.16b,#8 1241.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1242.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1243 add v2.2d,v4.2d,v0.2d // "D + T1" 1244.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1245 add v24.2d,v24.2d,v18.2d 1246 ld1 {v25.2d},[x3],#16 1247 ext v24.16b,v24.16b,v24.16b,#8 1248 ext v5.16b,v2.16b,v3.16b,#8 1249 ext v6.16b,v1.16b,v2.16b,#8 1250 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1251.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1252 ext v7.16b,v22.16b,v23.16b,#8 1253.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1254.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1255 add v4.2d,v1.2d,v3.2d // "D + T1" 1256.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1257 add v25.2d,v25.2d,v19.2d 1258 ld1 {v24.2d},[x3],#16 1259 ext v25.16b,v25.16b,v25.16b,#8 1260 ext v5.16b,v4.16b,v2.16b,#8 1261 ext v6.16b,v0.16b,v4.16b,#8 1262 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1263.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1264 ext v7.16b,v23.16b,v16.16b,#8 1265.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1266.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1267 add v1.2d,v0.2d,v2.2d // "D + T1" 1268.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1269 add v24.2d,v24.2d,v20.2d 1270 ld1 {v25.2d},[x3],#16 1271 ext v24.16b,v24.16b,v24.16b,#8 1272 ext v5.16b,v1.16b,v4.16b,#8 1273 ext v6.16b,v3.16b,v1.16b,#8 1274 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1275.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1276 ext v7.16b,v16.16b,v17.16b,#8 1277.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1278.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1279 add v0.2d,v3.2d,v4.2d // "D + T1" 1280.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1281 add v25.2d,v25.2d,v21.2d 1282 ld1 {v24.2d},[x3],#16 1283 ext v25.16b,v25.16b,v25.16b,#8 1284 ext v5.16b,v0.16b,v1.16b,#8 1285 ext v6.16b,v2.16b,v0.16b,#8 1286 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1287.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1288 ext v7.16b,v17.16b,v18.16b,#8 1289.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1290.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1291 add v3.2d,v2.2d,v1.2d // "D + T1" 1292.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1293 add v24.2d,v24.2d,v22.2d 1294 ld1 {v25.2d},[x3],#16 1295 ext v24.16b,v24.16b,v24.16b,#8 1296 ext v5.16b,v3.16b,v0.16b,#8 1297 ext v6.16b,v4.16b,v3.16b,#8 1298 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1299.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1300 ext v7.16b,v18.16b,v19.16b,#8 1301.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1302.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1303 add v2.2d,v4.2d,v0.2d // "D + T1" 1304.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1305 add v25.2d,v25.2d,v23.2d 1306 ld1 {v24.2d},[x3],#16 1307 ext v25.16b,v25.16b,v25.16b,#8 1308 ext v5.16b,v2.16b,v3.16b,#8 1309 ext v6.16b,v1.16b,v2.16b,#8 1310 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1311.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1312 ext v7.16b,v19.16b,v20.16b,#8 1313.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1314.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1315 add v4.2d,v1.2d,v3.2d // "D + T1" 1316.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1317 add v24.2d,v24.2d,v16.2d 1318 ld1 {v25.2d},[x3],#16 1319 ext v24.16b,v24.16b,v24.16b,#8 1320 ext v5.16b,v4.16b,v2.16b,#8 1321 ext v6.16b,v0.16b,v4.16b,#8 1322 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1323.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1324 ext v7.16b,v20.16b,v21.16b,#8 1325.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1326.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1327 add v1.2d,v0.2d,v2.2d // "D + T1" 1328.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1329 add v25.2d,v25.2d,v17.2d 1330 ld1 {v24.2d},[x3],#16 1331 ext v25.16b,v25.16b,v25.16b,#8 1332 ext v5.16b,v1.16b,v4.16b,#8 1333 ext v6.16b,v3.16b,v1.16b,#8 1334 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1335.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1336 ext v7.16b,v21.16b,v22.16b,#8 1337.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1338.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1339 add v0.2d,v3.2d,v4.2d // "D + T1" 1340.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1341 add v24.2d,v24.2d,v18.2d 1342 ld1 {v25.2d},[x3],#16 1343 ext v24.16b,v24.16b,v24.16b,#8 1344 ext v5.16b,v0.16b,v1.16b,#8 1345 ext v6.16b,v2.16b,v0.16b,#8 1346 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1347.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1348 ext v7.16b,v22.16b,v23.16b,#8 1349.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1350.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1351 add v3.2d,v2.2d,v1.2d // "D + T1" 1352.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1353 add v25.2d,v25.2d,v19.2d 1354 ld1 {v24.2d},[x3],#16 1355 ext v25.16b,v25.16b,v25.16b,#8 1356 ext v5.16b,v3.16b,v0.16b,#8 1357 ext v6.16b,v4.16b,v3.16b,#8 1358 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1359.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1360 ext v7.16b,v23.16b,v16.16b,#8 1361.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1362.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1363 add v2.2d,v4.2d,v0.2d // "D + T1" 1364.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1365 add v24.2d,v24.2d,v20.2d 1366 ld1 {v25.2d},[x3],#16 1367 ext v24.16b,v24.16b,v24.16b,#8 1368 ext v5.16b,v2.16b,v3.16b,#8 1369 ext v6.16b,v1.16b,v2.16b,#8 1370 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1371.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1372 ext v7.16b,v16.16b,v17.16b,#8 1373.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1374.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1375 add v4.2d,v1.2d,v3.2d // "D + T1" 1376.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1377 add v25.2d,v25.2d,v21.2d 1378 ld1 {v24.2d},[x3],#16 1379 ext v25.16b,v25.16b,v25.16b,#8 1380 ext v5.16b,v4.16b,v2.16b,#8 1381 ext v6.16b,v0.16b,v4.16b,#8 1382 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1383.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1384 ext v7.16b,v17.16b,v18.16b,#8 1385.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1386.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1387 add v1.2d,v0.2d,v2.2d // "D + T1" 1388.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1389 add v24.2d,v24.2d,v22.2d 1390 ld1 {v25.2d},[x3],#16 1391 ext v24.16b,v24.16b,v24.16b,#8 1392 ext v5.16b,v1.16b,v4.16b,#8 1393 ext v6.16b,v3.16b,v1.16b,#8 1394 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1395.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1396 ext v7.16b,v18.16b,v19.16b,#8 1397.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1398.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1399 add v0.2d,v3.2d,v4.2d // "D + T1" 1400.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1401 add v25.2d,v25.2d,v23.2d 1402 ld1 {v24.2d},[x3],#16 1403 ext v25.16b,v25.16b,v25.16b,#8 1404 ext v5.16b,v0.16b,v1.16b,#8 1405 ext v6.16b,v2.16b,v0.16b,#8 1406 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1407.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1408 ext v7.16b,v19.16b,v20.16b,#8 1409.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1410.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1411 add v3.2d,v2.2d,v1.2d // "D + T1" 1412.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1413 add v24.2d,v24.2d,v16.2d 1414 ld1 {v25.2d},[x3],#16 1415 ext v24.16b,v24.16b,v24.16b,#8 1416 ext v5.16b,v3.16b,v0.16b,#8 1417 ext v6.16b,v4.16b,v3.16b,#8 1418 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1419.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1420 ext v7.16b,v20.16b,v21.16b,#8 1421.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1422.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1423 add v2.2d,v4.2d,v0.2d // "D + T1" 1424.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1425 add v25.2d,v25.2d,v17.2d 1426 ld1 {v24.2d},[x3],#16 1427 ext v25.16b,v25.16b,v25.16b,#8 1428 ext v5.16b,v2.16b,v3.16b,#8 1429 ext v6.16b,v1.16b,v2.16b,#8 1430 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1431.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1432 ext v7.16b,v21.16b,v22.16b,#8 1433.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1434.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1435 add v4.2d,v1.2d,v3.2d // "D + T1" 1436.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1437 add v24.2d,v24.2d,v18.2d 1438 ld1 {v25.2d},[x3],#16 1439 ext v24.16b,v24.16b,v24.16b,#8 1440 ext v5.16b,v4.16b,v2.16b,#8 1441 ext v6.16b,v0.16b,v4.16b,#8 1442 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1443.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1444 ext v7.16b,v22.16b,v23.16b,#8 1445.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1446.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1447 add v1.2d,v0.2d,v2.2d // "D + T1" 1448.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1449 add v25.2d,v25.2d,v19.2d 1450 ld1 {v24.2d},[x3],#16 1451 ext v25.16b,v25.16b,v25.16b,#8 1452 ext v5.16b,v1.16b,v4.16b,#8 1453 ext v6.16b,v3.16b,v1.16b,#8 1454 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1455.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1456 ext v7.16b,v23.16b,v16.16b,#8 1457.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1458.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1459 add v0.2d,v3.2d,v4.2d // "D + T1" 1460.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1461 add v24.2d,v24.2d,v20.2d 1462 ld1 {v25.2d},[x3],#16 1463 ext v24.16b,v24.16b,v24.16b,#8 1464 ext v5.16b,v0.16b,v1.16b,#8 1465 ext v6.16b,v2.16b,v0.16b,#8 1466 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1467.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1468 ext v7.16b,v16.16b,v17.16b,#8 1469.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1470.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1471 add v3.2d,v2.2d,v1.2d // "D + T1" 1472.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1473 add v25.2d,v25.2d,v21.2d 1474 ld1 {v24.2d},[x3],#16 1475 ext v25.16b,v25.16b,v25.16b,#8 1476 ext v5.16b,v3.16b,v0.16b,#8 1477 ext v6.16b,v4.16b,v3.16b,#8 1478 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1479.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1480 ext v7.16b,v17.16b,v18.16b,#8 1481.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1482.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1483 add v2.2d,v4.2d,v0.2d // "D + T1" 1484.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1485 add v24.2d,v24.2d,v22.2d 1486 ld1 {v25.2d},[x3],#16 1487 ext v24.16b,v24.16b,v24.16b,#8 1488 ext v5.16b,v2.16b,v3.16b,#8 1489 ext v6.16b,v1.16b,v2.16b,#8 1490 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1491.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1492 ext v7.16b,v18.16b,v19.16b,#8 1493.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1494.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1495 add v4.2d,v1.2d,v3.2d // "D + T1" 1496.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1497 add v25.2d,v25.2d,v23.2d 1498 ld1 {v24.2d},[x3],#16 1499 ext v25.16b,v25.16b,v25.16b,#8 1500 ext v5.16b,v4.16b,v2.16b,#8 1501 ext v6.16b,v0.16b,v4.16b,#8 1502 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1503.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1504 ext v7.16b,v19.16b,v20.16b,#8 1505.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1506.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1507 add v1.2d,v0.2d,v2.2d // "D + T1" 1508.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1509 ld1 {v25.2d},[x3],#16 1510 add v24.2d,v24.2d,v16.2d 1511 ld1 {v16.16b},[x1],#16 // load next input 1512 ext v24.16b,v24.16b,v24.16b,#8 1513 ext v5.16b,v1.16b,v4.16b,#8 1514 ext v6.16b,v3.16b,v1.16b,#8 1515 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1516.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1517 rev64 v16.16b,v16.16b 1518 add v0.2d,v3.2d,v4.2d // "D + T1" 1519.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1520 ld1 {v24.2d},[x3],#16 1521 add v25.2d,v25.2d,v17.2d 1522 ld1 {v17.16b},[x1],#16 // load next input 1523 ext v25.16b,v25.16b,v25.16b,#8 1524 ext v5.16b,v0.16b,v1.16b,#8 1525 ext v6.16b,v2.16b,v0.16b,#8 1526 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1527.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1528 rev64 v17.16b,v17.16b 1529 add v3.2d,v2.2d,v1.2d // "D + T1" 1530.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1531 ld1 {v25.2d},[x3],#16 1532 add v24.2d,v24.2d,v18.2d 1533 ld1 {v18.16b},[x1],#16 // load next input 1534 ext v24.16b,v24.16b,v24.16b,#8 1535 ext v5.16b,v3.16b,v0.16b,#8 1536 ext v6.16b,v4.16b,v3.16b,#8 1537 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1538.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1539 rev64 v18.16b,v18.16b 1540 add v2.2d,v4.2d,v0.2d // "D + T1" 1541.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1542 ld1 {v24.2d},[x3],#16 1543 add v25.2d,v25.2d,v19.2d 1544 ld1 {v19.16b},[x1],#16 // load next input 1545 ext v25.16b,v25.16b,v25.16b,#8 1546 ext v5.16b,v2.16b,v3.16b,#8 1547 ext v6.16b,v1.16b,v2.16b,#8 1548 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1549.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1550 rev64 v19.16b,v19.16b 1551 add v4.2d,v1.2d,v3.2d // "D + T1" 1552.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1553 ld1 {v25.2d},[x3],#16 1554 add v24.2d,v24.2d,v20.2d 1555 ld1 {v20.16b},[x1],#16 // load next input 1556 ext v24.16b,v24.16b,v24.16b,#8 1557 ext v5.16b,v4.16b,v2.16b,#8 1558 ext v6.16b,v0.16b,v4.16b,#8 1559 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1560.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1561 rev64 v20.16b,v20.16b 1562 add v1.2d,v0.2d,v2.2d // "D + T1" 1563.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1564 ld1 {v24.2d},[x3],#16 1565 add v25.2d,v25.2d,v21.2d 1566 ld1 {v21.16b},[x1],#16 // load next input 1567 ext v25.16b,v25.16b,v25.16b,#8 1568 ext v5.16b,v1.16b,v4.16b,#8 1569 ext v6.16b,v3.16b,v1.16b,#8 1570 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1571.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1572 rev64 v21.16b,v21.16b 1573 add v0.2d,v3.2d,v4.2d // "D + T1" 1574.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1575 ld1 {v25.2d},[x3],#16 1576 add v24.2d,v24.2d,v22.2d 1577 ld1 {v22.16b},[x1],#16 // load next input 1578 ext v24.16b,v24.16b,v24.16b,#8 1579 ext v5.16b,v0.16b,v1.16b,#8 1580 ext v6.16b,v2.16b,v0.16b,#8 1581 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1582.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1583 rev64 v22.16b,v22.16b 1584 add v3.2d,v2.2d,v1.2d // "D + T1" 1585.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1586 sub x3,x3,#80*8 // rewind 1587 add v25.2d,v25.2d,v23.2d 1588 ld1 {v23.16b},[x1],#16 // load next input 1589 ext v25.16b,v25.16b,v25.16b,#8 1590 ext v5.16b,v3.16b,v0.16b,#8 1591 ext v6.16b,v4.16b,v3.16b,#8 1592 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1593.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1594 rev64 v23.16b,v23.16b 1595 add v2.2d,v4.2d,v0.2d // "D + T1" 1596.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1597 add v0.2d,v0.2d,v26.2d // accumulate 1598 add v1.2d,v1.2d,v27.2d 1599 add v2.2d,v2.2d,v28.2d 1600 add v3.2d,v3.2d,v29.2d 1601 1602 cbnz x2,.Loop_hw 1603 1604 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1605 1606 ldr x29,[sp],#16 1607 ret 1608.size sha512_block_armv8,.-sha512_block_armv8 1609#endif 1610