1/* Do not modify. This file is auto-generated from sha512-armv8.pl. */ 2// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 3// 4// Licensed under the OpenSSL license (the "License"). You may not use 5// this file except in compliance with the License. You can obtain a copy 6// in the file LICENSE in the source distribution or at 7// https://www.openssl.org/source/license.html 8 9// ==================================================================== 10// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11// project. The module is, however, dual licensed under OpenSSL and 12// CRYPTOGAMS licenses depending on where you obtain it. For further 13// details see http://www.openssl.org/~appro/cryptogams/. 14// 15// Permission to use under GPLv2 terms is granted. 16// ==================================================================== 17// 18// SHA256/512 for ARMv8. 19// 20// Performance in cycles per processed byte and improvement coefficient 21// over code generated with "default" compiler: 22// 23// SHA256-hw SHA256(*) SHA512 24// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 25// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 26// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 27// Denver 2.01 10.5 (+26%) 6.70 (+8%) 28// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 29// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 30// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 31// 32// (*) Software SHA256 results are of lesser relevance, presented 33// mostly for informational purposes. 34// (**) The result is a trade-off: it's possible to improve it by 35// 10% (or by 1 cycle per round), but at the cost of 20% loss 36// on Cortex-A53 (or by 4 cycles per round). 37// (***) Super-impressive coefficients over gcc-generated code are 38// indication of some compiler "pathology", most notably code 39// generated with -mgeneral-regs-only is significantly faster 40// and the gap is only 40-90%. 41// 42// October 2016. 43// 44// Originally it was reckoned that it makes no sense to implement NEON 45// version of SHA256 for 64-bit processors. This is because performance 46// improvement on most wide-spread Cortex-A5x processors was observed 47// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was 48// observed that 32-bit NEON SHA256 performs significantly better than 49// 64-bit scalar version on *some* of the more recent processors. As 50// result 64-bit NEON version of SHA256 was added to provide best 51// all-round performance. For example it executes ~30% faster on X-Gene 52// and Mongoose. [For reference, NEON version of SHA512 is bound to 53// deliver much less improvement, likely *negative* on Cortex-A5x. 54// Which is why NEON support is limited to SHA256.] 55 56#ifndef __KERNEL__ 57# include "arm_arch.h" 58#endif 59 60.text 61 62 63.hidden OPENSSL_armcap_P 64.globl sha512_block_data_order 65.type sha512_block_data_order,%function 66.align 6 67sha512_block_data_order: 68#ifndef __KERNEL__ 69# ifdef __ILP32__ 70 ldrsw x16,.LOPENSSL_armcap_P 71# else 72 ldr x16,.LOPENSSL_armcap_P 73# endif 74 adr x17,.LOPENSSL_armcap_P 75 add x16,x16,x17 76 ldr w16,[x16] 77 tst w16,#ARMV8_SHA512 78 b.ne .Lv8_entry 79#endif 80.inst 0xd503233f // paciasp 81 stp x29,x30,[sp,#-128]! 82 add x29,sp,#0 83 84 stp x19,x20,[sp,#16] 85 stp x21,x22,[sp,#32] 86 stp x23,x24,[sp,#48] 87 stp x25,x26,[sp,#64] 88 stp x27,x28,[sp,#80] 89 sub sp,sp,#4*8 90 91 ldp x20,x21,[x0] // load context 92 ldp x22,x23,[x0,#2*8] 93 ldp x24,x25,[x0,#4*8] 94 add x2,x1,x2,lsl#7 // end of input 95 ldp x26,x27,[x0,#6*8] 96 adr x30,.LK512 97 stp x0,x2,[x29,#96] 98 99.Loop: 100 ldp x3,x4,[x1],#2*8 101 ldr x19,[x30],#8 // *K++ 102 eor x28,x21,x22 // magic seed 103 str x1,[x29,#112] 104#ifndef __AARCH64EB__ 105 rev x3,x3 // 0 106#endif 107 ror x16,x24,#14 108 add x27,x27,x19 // h+=K[i] 109 eor x6,x24,x24,ror#23 110 and x17,x25,x24 111 bic x19,x26,x24 112 add x27,x27,x3 // h+=X[i] 113 orr x17,x17,x19 // Ch(e,f,g) 114 eor x19,x20,x21 // a^b, b^c in next round 115 eor x16,x16,x6,ror#18 // Sigma1(e) 116 ror x6,x20,#28 117 add x27,x27,x17 // h+=Ch(e,f,g) 118 eor x17,x20,x20,ror#5 119 add x27,x27,x16 // h+=Sigma1(e) 120 and x28,x28,x19 // (b^c)&=(a^b) 121 add x23,x23,x27 // d+=h 122 eor x28,x28,x21 // Maj(a,b,c) 123 eor x17,x6,x17,ror#34 // Sigma0(a) 124 add x27,x27,x28 // h+=Maj(a,b,c) 125 ldr x28,[x30],#8 // *K++, x19 in next round 126 //add x27,x27,x17 // h+=Sigma0(a) 127#ifndef __AARCH64EB__ 128 rev x4,x4 // 1 129#endif 130 ldp x5,x6,[x1],#2*8 131 add x27,x27,x17 // h+=Sigma0(a) 132 ror x16,x23,#14 133 add x26,x26,x28 // h+=K[i] 134 eor x7,x23,x23,ror#23 135 and x17,x24,x23 136 bic x28,x25,x23 137 add x26,x26,x4 // h+=X[i] 138 orr x17,x17,x28 // Ch(e,f,g) 139 eor x28,x27,x20 // a^b, b^c in next round 140 eor x16,x16,x7,ror#18 // Sigma1(e) 141 ror x7,x27,#28 142 add x26,x26,x17 // h+=Ch(e,f,g) 143 eor x17,x27,x27,ror#5 144 add x26,x26,x16 // h+=Sigma1(e) 145 and x19,x19,x28 // (b^c)&=(a^b) 146 add x22,x22,x26 // d+=h 147 eor x19,x19,x20 // Maj(a,b,c) 148 eor x17,x7,x17,ror#34 // Sigma0(a) 149 add x26,x26,x19 // h+=Maj(a,b,c) 150 ldr x19,[x30],#8 // *K++, x28 in next round 151 //add x26,x26,x17 // h+=Sigma0(a) 152#ifndef __AARCH64EB__ 153 rev x5,x5 // 2 154#endif 155 add x26,x26,x17 // h+=Sigma0(a) 156 ror x16,x22,#14 157 add x25,x25,x19 // h+=K[i] 158 eor x8,x22,x22,ror#23 159 and x17,x23,x22 160 bic x19,x24,x22 161 add x25,x25,x5 // h+=X[i] 162 orr x17,x17,x19 // Ch(e,f,g) 163 eor x19,x26,x27 // a^b, b^c in next round 164 eor x16,x16,x8,ror#18 // Sigma1(e) 165 ror x8,x26,#28 166 add x25,x25,x17 // h+=Ch(e,f,g) 167 eor x17,x26,x26,ror#5 168 add x25,x25,x16 // h+=Sigma1(e) 169 and x28,x28,x19 // (b^c)&=(a^b) 170 add x21,x21,x25 // d+=h 171 eor x28,x28,x27 // Maj(a,b,c) 172 eor x17,x8,x17,ror#34 // Sigma0(a) 173 add x25,x25,x28 // h+=Maj(a,b,c) 174 ldr x28,[x30],#8 // *K++, x19 in next round 175 //add x25,x25,x17 // h+=Sigma0(a) 176#ifndef __AARCH64EB__ 177 rev x6,x6 // 3 178#endif 179 ldp x7,x8,[x1],#2*8 180 add x25,x25,x17 // h+=Sigma0(a) 181 ror x16,x21,#14 182 add x24,x24,x28 // h+=K[i] 183 eor x9,x21,x21,ror#23 184 and x17,x22,x21 185 bic x28,x23,x21 186 add x24,x24,x6 // h+=X[i] 187 orr x17,x17,x28 // Ch(e,f,g) 188 eor x28,x25,x26 // a^b, b^c in next round 189 eor x16,x16,x9,ror#18 // Sigma1(e) 190 ror x9,x25,#28 191 add x24,x24,x17 // h+=Ch(e,f,g) 192 eor x17,x25,x25,ror#5 193 add x24,x24,x16 // h+=Sigma1(e) 194 and x19,x19,x28 // (b^c)&=(a^b) 195 add x20,x20,x24 // d+=h 196 eor x19,x19,x26 // Maj(a,b,c) 197 eor x17,x9,x17,ror#34 // Sigma0(a) 198 add x24,x24,x19 // h+=Maj(a,b,c) 199 ldr x19,[x30],#8 // *K++, x28 in next round 200 //add x24,x24,x17 // h+=Sigma0(a) 201#ifndef __AARCH64EB__ 202 rev x7,x7 // 4 203#endif 204 add x24,x24,x17 // h+=Sigma0(a) 205 ror x16,x20,#14 206 add x23,x23,x19 // h+=K[i] 207 eor x10,x20,x20,ror#23 208 and x17,x21,x20 209 bic x19,x22,x20 210 add x23,x23,x7 // h+=X[i] 211 orr x17,x17,x19 // Ch(e,f,g) 212 eor x19,x24,x25 // a^b, b^c in next round 213 eor x16,x16,x10,ror#18 // Sigma1(e) 214 ror x10,x24,#28 215 add x23,x23,x17 // h+=Ch(e,f,g) 216 eor x17,x24,x24,ror#5 217 add x23,x23,x16 // h+=Sigma1(e) 218 and x28,x28,x19 // (b^c)&=(a^b) 219 add x27,x27,x23 // d+=h 220 eor x28,x28,x25 // Maj(a,b,c) 221 eor x17,x10,x17,ror#34 // Sigma0(a) 222 add x23,x23,x28 // h+=Maj(a,b,c) 223 ldr x28,[x30],#8 // *K++, x19 in next round 224 //add x23,x23,x17 // h+=Sigma0(a) 225#ifndef __AARCH64EB__ 226 rev x8,x8 // 5 227#endif 228 ldp x9,x10,[x1],#2*8 229 add x23,x23,x17 // h+=Sigma0(a) 230 ror x16,x27,#14 231 add x22,x22,x28 // h+=K[i] 232 eor x11,x27,x27,ror#23 233 and x17,x20,x27 234 bic x28,x21,x27 235 add x22,x22,x8 // h+=X[i] 236 orr x17,x17,x28 // Ch(e,f,g) 237 eor x28,x23,x24 // a^b, b^c in next round 238 eor x16,x16,x11,ror#18 // Sigma1(e) 239 ror x11,x23,#28 240 add x22,x22,x17 // h+=Ch(e,f,g) 241 eor x17,x23,x23,ror#5 242 add x22,x22,x16 // h+=Sigma1(e) 243 and x19,x19,x28 // (b^c)&=(a^b) 244 add x26,x26,x22 // d+=h 245 eor x19,x19,x24 // Maj(a,b,c) 246 eor x17,x11,x17,ror#34 // Sigma0(a) 247 add x22,x22,x19 // h+=Maj(a,b,c) 248 ldr x19,[x30],#8 // *K++, x28 in next round 249 //add x22,x22,x17 // h+=Sigma0(a) 250#ifndef __AARCH64EB__ 251 rev x9,x9 // 6 252#endif 253 add x22,x22,x17 // h+=Sigma0(a) 254 ror x16,x26,#14 255 add x21,x21,x19 // h+=K[i] 256 eor x12,x26,x26,ror#23 257 and x17,x27,x26 258 bic x19,x20,x26 259 add x21,x21,x9 // h+=X[i] 260 orr x17,x17,x19 // Ch(e,f,g) 261 eor x19,x22,x23 // a^b, b^c in next round 262 eor x16,x16,x12,ror#18 // Sigma1(e) 263 ror x12,x22,#28 264 add x21,x21,x17 // h+=Ch(e,f,g) 265 eor x17,x22,x22,ror#5 266 add x21,x21,x16 // h+=Sigma1(e) 267 and x28,x28,x19 // (b^c)&=(a^b) 268 add x25,x25,x21 // d+=h 269 eor x28,x28,x23 // Maj(a,b,c) 270 eor x17,x12,x17,ror#34 // Sigma0(a) 271 add x21,x21,x28 // h+=Maj(a,b,c) 272 ldr x28,[x30],#8 // *K++, x19 in next round 273 //add x21,x21,x17 // h+=Sigma0(a) 274#ifndef __AARCH64EB__ 275 rev x10,x10 // 7 276#endif 277 ldp x11,x12,[x1],#2*8 278 add x21,x21,x17 // h+=Sigma0(a) 279 ror x16,x25,#14 280 add x20,x20,x28 // h+=K[i] 281 eor x13,x25,x25,ror#23 282 and x17,x26,x25 283 bic x28,x27,x25 284 add x20,x20,x10 // h+=X[i] 285 orr x17,x17,x28 // Ch(e,f,g) 286 eor x28,x21,x22 // a^b, b^c in next round 287 eor x16,x16,x13,ror#18 // Sigma1(e) 288 ror x13,x21,#28 289 add x20,x20,x17 // h+=Ch(e,f,g) 290 eor x17,x21,x21,ror#5 291 add x20,x20,x16 // h+=Sigma1(e) 292 and x19,x19,x28 // (b^c)&=(a^b) 293 add x24,x24,x20 // d+=h 294 eor x19,x19,x22 // Maj(a,b,c) 295 eor x17,x13,x17,ror#34 // Sigma0(a) 296 add x20,x20,x19 // h+=Maj(a,b,c) 297 ldr x19,[x30],#8 // *K++, x28 in next round 298 //add x20,x20,x17 // h+=Sigma0(a) 299#ifndef __AARCH64EB__ 300 rev x11,x11 // 8 301#endif 302 add x20,x20,x17 // h+=Sigma0(a) 303 ror x16,x24,#14 304 add x27,x27,x19 // h+=K[i] 305 eor x14,x24,x24,ror#23 306 and x17,x25,x24 307 bic x19,x26,x24 308 add x27,x27,x11 // h+=X[i] 309 orr x17,x17,x19 // Ch(e,f,g) 310 eor x19,x20,x21 // a^b, b^c in next round 311 eor x16,x16,x14,ror#18 // Sigma1(e) 312 ror x14,x20,#28 313 add x27,x27,x17 // h+=Ch(e,f,g) 314 eor x17,x20,x20,ror#5 315 add x27,x27,x16 // h+=Sigma1(e) 316 and x28,x28,x19 // (b^c)&=(a^b) 317 add x23,x23,x27 // d+=h 318 eor x28,x28,x21 // Maj(a,b,c) 319 eor x17,x14,x17,ror#34 // Sigma0(a) 320 add x27,x27,x28 // h+=Maj(a,b,c) 321 ldr x28,[x30],#8 // *K++, x19 in next round 322 //add x27,x27,x17 // h+=Sigma0(a) 323#ifndef __AARCH64EB__ 324 rev x12,x12 // 9 325#endif 326 ldp x13,x14,[x1],#2*8 327 add x27,x27,x17 // h+=Sigma0(a) 328 ror x16,x23,#14 329 add x26,x26,x28 // h+=K[i] 330 eor x15,x23,x23,ror#23 331 and x17,x24,x23 332 bic x28,x25,x23 333 add x26,x26,x12 // h+=X[i] 334 orr x17,x17,x28 // Ch(e,f,g) 335 eor x28,x27,x20 // a^b, b^c in next round 336 eor x16,x16,x15,ror#18 // Sigma1(e) 337 ror x15,x27,#28 338 add x26,x26,x17 // h+=Ch(e,f,g) 339 eor x17,x27,x27,ror#5 340 add x26,x26,x16 // h+=Sigma1(e) 341 and x19,x19,x28 // (b^c)&=(a^b) 342 add x22,x22,x26 // d+=h 343 eor x19,x19,x20 // Maj(a,b,c) 344 eor x17,x15,x17,ror#34 // Sigma0(a) 345 add x26,x26,x19 // h+=Maj(a,b,c) 346 ldr x19,[x30],#8 // *K++, x28 in next round 347 //add x26,x26,x17 // h+=Sigma0(a) 348#ifndef __AARCH64EB__ 349 rev x13,x13 // 10 350#endif 351 add x26,x26,x17 // h+=Sigma0(a) 352 ror x16,x22,#14 353 add x25,x25,x19 // h+=K[i] 354 eor x0,x22,x22,ror#23 355 and x17,x23,x22 356 bic x19,x24,x22 357 add x25,x25,x13 // h+=X[i] 358 orr x17,x17,x19 // Ch(e,f,g) 359 eor x19,x26,x27 // a^b, b^c in next round 360 eor x16,x16,x0,ror#18 // Sigma1(e) 361 ror x0,x26,#28 362 add x25,x25,x17 // h+=Ch(e,f,g) 363 eor x17,x26,x26,ror#5 364 add x25,x25,x16 // h+=Sigma1(e) 365 and x28,x28,x19 // (b^c)&=(a^b) 366 add x21,x21,x25 // d+=h 367 eor x28,x28,x27 // Maj(a,b,c) 368 eor x17,x0,x17,ror#34 // Sigma0(a) 369 add x25,x25,x28 // h+=Maj(a,b,c) 370 ldr x28,[x30],#8 // *K++, x19 in next round 371 //add x25,x25,x17 // h+=Sigma0(a) 372#ifndef __AARCH64EB__ 373 rev x14,x14 // 11 374#endif 375 ldp x15,x0,[x1],#2*8 376 add x25,x25,x17 // h+=Sigma0(a) 377 str x6,[sp,#24] 378 ror x16,x21,#14 379 add x24,x24,x28 // h+=K[i] 380 eor x6,x21,x21,ror#23 381 and x17,x22,x21 382 bic x28,x23,x21 383 add x24,x24,x14 // h+=X[i] 384 orr x17,x17,x28 // Ch(e,f,g) 385 eor x28,x25,x26 // a^b, b^c in next round 386 eor x16,x16,x6,ror#18 // Sigma1(e) 387 ror x6,x25,#28 388 add x24,x24,x17 // h+=Ch(e,f,g) 389 eor x17,x25,x25,ror#5 390 add x24,x24,x16 // h+=Sigma1(e) 391 and x19,x19,x28 // (b^c)&=(a^b) 392 add x20,x20,x24 // d+=h 393 eor x19,x19,x26 // Maj(a,b,c) 394 eor x17,x6,x17,ror#34 // Sigma0(a) 395 add x24,x24,x19 // h+=Maj(a,b,c) 396 ldr x19,[x30],#8 // *K++, x28 in next round 397 //add x24,x24,x17 // h+=Sigma0(a) 398#ifndef __AARCH64EB__ 399 rev x15,x15 // 12 400#endif 401 add x24,x24,x17 // h+=Sigma0(a) 402 str x7,[sp,#0] 403 ror x16,x20,#14 404 add x23,x23,x19 // h+=K[i] 405 eor x7,x20,x20,ror#23 406 and x17,x21,x20 407 bic x19,x22,x20 408 add x23,x23,x15 // h+=X[i] 409 orr x17,x17,x19 // Ch(e,f,g) 410 eor x19,x24,x25 // a^b, b^c in next round 411 eor x16,x16,x7,ror#18 // Sigma1(e) 412 ror x7,x24,#28 413 add x23,x23,x17 // h+=Ch(e,f,g) 414 eor x17,x24,x24,ror#5 415 add x23,x23,x16 // h+=Sigma1(e) 416 and x28,x28,x19 // (b^c)&=(a^b) 417 add x27,x27,x23 // d+=h 418 eor x28,x28,x25 // Maj(a,b,c) 419 eor x17,x7,x17,ror#34 // Sigma0(a) 420 add x23,x23,x28 // h+=Maj(a,b,c) 421 ldr x28,[x30],#8 // *K++, x19 in next round 422 //add x23,x23,x17 // h+=Sigma0(a) 423#ifndef __AARCH64EB__ 424 rev x0,x0 // 13 425#endif 426 ldp x1,x2,[x1] 427 add x23,x23,x17 // h+=Sigma0(a) 428 str x8,[sp,#8] 429 ror x16,x27,#14 430 add x22,x22,x28 // h+=K[i] 431 eor x8,x27,x27,ror#23 432 and x17,x20,x27 433 bic x28,x21,x27 434 add x22,x22,x0 // h+=X[i] 435 orr x17,x17,x28 // Ch(e,f,g) 436 eor x28,x23,x24 // a^b, b^c in next round 437 eor x16,x16,x8,ror#18 // Sigma1(e) 438 ror x8,x23,#28 439 add x22,x22,x17 // h+=Ch(e,f,g) 440 eor x17,x23,x23,ror#5 441 add x22,x22,x16 // h+=Sigma1(e) 442 and x19,x19,x28 // (b^c)&=(a^b) 443 add x26,x26,x22 // d+=h 444 eor x19,x19,x24 // Maj(a,b,c) 445 eor x17,x8,x17,ror#34 // Sigma0(a) 446 add x22,x22,x19 // h+=Maj(a,b,c) 447 ldr x19,[x30],#8 // *K++, x28 in next round 448 //add x22,x22,x17 // h+=Sigma0(a) 449#ifndef __AARCH64EB__ 450 rev x1,x1 // 14 451#endif 452 ldr x6,[sp,#24] 453 add x22,x22,x17 // h+=Sigma0(a) 454 str x9,[sp,#16] 455 ror x16,x26,#14 456 add x21,x21,x19 // h+=K[i] 457 eor x9,x26,x26,ror#23 458 and x17,x27,x26 459 bic x19,x20,x26 460 add x21,x21,x1 // h+=X[i] 461 orr x17,x17,x19 // Ch(e,f,g) 462 eor x19,x22,x23 // a^b, b^c in next round 463 eor x16,x16,x9,ror#18 // Sigma1(e) 464 ror x9,x22,#28 465 add x21,x21,x17 // h+=Ch(e,f,g) 466 eor x17,x22,x22,ror#5 467 add x21,x21,x16 // h+=Sigma1(e) 468 and x28,x28,x19 // (b^c)&=(a^b) 469 add x25,x25,x21 // d+=h 470 eor x28,x28,x23 // Maj(a,b,c) 471 eor x17,x9,x17,ror#34 // Sigma0(a) 472 add x21,x21,x28 // h+=Maj(a,b,c) 473 ldr x28,[x30],#8 // *K++, x19 in next round 474 //add x21,x21,x17 // h+=Sigma0(a) 475#ifndef __AARCH64EB__ 476 rev x2,x2 // 15 477#endif 478 ldr x7,[sp,#0] 479 add x21,x21,x17 // h+=Sigma0(a) 480 str x10,[sp,#24] 481 ror x16,x25,#14 482 add x20,x20,x28 // h+=K[i] 483 ror x9,x4,#1 484 and x17,x26,x25 485 ror x8,x1,#19 486 bic x28,x27,x25 487 ror x10,x21,#28 488 add x20,x20,x2 // h+=X[i] 489 eor x16,x16,x25,ror#18 490 eor x9,x9,x4,ror#8 491 orr x17,x17,x28 // Ch(e,f,g) 492 eor x28,x21,x22 // a^b, b^c in next round 493 eor x16,x16,x25,ror#41 // Sigma1(e) 494 eor x10,x10,x21,ror#34 495 add x20,x20,x17 // h+=Ch(e,f,g) 496 and x19,x19,x28 // (b^c)&=(a^b) 497 eor x8,x8,x1,ror#61 498 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 499 add x20,x20,x16 // h+=Sigma1(e) 500 eor x19,x19,x22 // Maj(a,b,c) 501 eor x17,x10,x21,ror#39 // Sigma0(a) 502 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 503 add x3,x3,x12 504 add x24,x24,x20 // d+=h 505 add x20,x20,x19 // h+=Maj(a,b,c) 506 ldr x19,[x30],#8 // *K++, x28 in next round 507 add x3,x3,x9 508 add x20,x20,x17 // h+=Sigma0(a) 509 add x3,x3,x8 510.Loop_16_xx: 511 ldr x8,[sp,#8] 512 str x11,[sp,#0] 513 ror x16,x24,#14 514 add x27,x27,x19 // h+=K[i] 515 ror x10,x5,#1 516 and x17,x25,x24 517 ror x9,x2,#19 518 bic x19,x26,x24 519 ror x11,x20,#28 520 add x27,x27,x3 // h+=X[i] 521 eor x16,x16,x24,ror#18 522 eor x10,x10,x5,ror#8 523 orr x17,x17,x19 // Ch(e,f,g) 524 eor x19,x20,x21 // a^b, b^c in next round 525 eor x16,x16,x24,ror#41 // Sigma1(e) 526 eor x11,x11,x20,ror#34 527 add x27,x27,x17 // h+=Ch(e,f,g) 528 and x28,x28,x19 // (b^c)&=(a^b) 529 eor x9,x9,x2,ror#61 530 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 531 add x27,x27,x16 // h+=Sigma1(e) 532 eor x28,x28,x21 // Maj(a,b,c) 533 eor x17,x11,x20,ror#39 // Sigma0(a) 534 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 535 add x4,x4,x13 536 add x23,x23,x27 // d+=h 537 add x27,x27,x28 // h+=Maj(a,b,c) 538 ldr x28,[x30],#8 // *K++, x19 in next round 539 add x4,x4,x10 540 add x27,x27,x17 // h+=Sigma0(a) 541 add x4,x4,x9 542 ldr x9,[sp,#16] 543 str x12,[sp,#8] 544 ror x16,x23,#14 545 add x26,x26,x28 // h+=K[i] 546 ror x11,x6,#1 547 and x17,x24,x23 548 ror x10,x3,#19 549 bic x28,x25,x23 550 ror x12,x27,#28 551 add x26,x26,x4 // h+=X[i] 552 eor x16,x16,x23,ror#18 553 eor x11,x11,x6,ror#8 554 orr x17,x17,x28 // Ch(e,f,g) 555 eor x28,x27,x20 // a^b, b^c in next round 556 eor x16,x16,x23,ror#41 // Sigma1(e) 557 eor x12,x12,x27,ror#34 558 add x26,x26,x17 // h+=Ch(e,f,g) 559 and x19,x19,x28 // (b^c)&=(a^b) 560 eor x10,x10,x3,ror#61 561 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 562 add x26,x26,x16 // h+=Sigma1(e) 563 eor x19,x19,x20 // Maj(a,b,c) 564 eor x17,x12,x27,ror#39 // Sigma0(a) 565 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 566 add x5,x5,x14 567 add x22,x22,x26 // d+=h 568 add x26,x26,x19 // h+=Maj(a,b,c) 569 ldr x19,[x30],#8 // *K++, x28 in next round 570 add x5,x5,x11 571 add x26,x26,x17 // h+=Sigma0(a) 572 add x5,x5,x10 573 ldr x10,[sp,#24] 574 str x13,[sp,#16] 575 ror x16,x22,#14 576 add x25,x25,x19 // h+=K[i] 577 ror x12,x7,#1 578 and x17,x23,x22 579 ror x11,x4,#19 580 bic x19,x24,x22 581 ror x13,x26,#28 582 add x25,x25,x5 // h+=X[i] 583 eor x16,x16,x22,ror#18 584 eor x12,x12,x7,ror#8 585 orr x17,x17,x19 // Ch(e,f,g) 586 eor x19,x26,x27 // a^b, b^c in next round 587 eor x16,x16,x22,ror#41 // Sigma1(e) 588 eor x13,x13,x26,ror#34 589 add x25,x25,x17 // h+=Ch(e,f,g) 590 and x28,x28,x19 // (b^c)&=(a^b) 591 eor x11,x11,x4,ror#61 592 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 593 add x25,x25,x16 // h+=Sigma1(e) 594 eor x28,x28,x27 // Maj(a,b,c) 595 eor x17,x13,x26,ror#39 // Sigma0(a) 596 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 597 add x6,x6,x15 598 add x21,x21,x25 // d+=h 599 add x25,x25,x28 // h+=Maj(a,b,c) 600 ldr x28,[x30],#8 // *K++, x19 in next round 601 add x6,x6,x12 602 add x25,x25,x17 // h+=Sigma0(a) 603 add x6,x6,x11 604 ldr x11,[sp,#0] 605 str x14,[sp,#24] 606 ror x16,x21,#14 607 add x24,x24,x28 // h+=K[i] 608 ror x13,x8,#1 609 and x17,x22,x21 610 ror x12,x5,#19 611 bic x28,x23,x21 612 ror x14,x25,#28 613 add x24,x24,x6 // h+=X[i] 614 eor x16,x16,x21,ror#18 615 eor x13,x13,x8,ror#8 616 orr x17,x17,x28 // Ch(e,f,g) 617 eor x28,x25,x26 // a^b, b^c in next round 618 eor x16,x16,x21,ror#41 // Sigma1(e) 619 eor x14,x14,x25,ror#34 620 add x24,x24,x17 // h+=Ch(e,f,g) 621 and x19,x19,x28 // (b^c)&=(a^b) 622 eor x12,x12,x5,ror#61 623 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 624 add x24,x24,x16 // h+=Sigma1(e) 625 eor x19,x19,x26 // Maj(a,b,c) 626 eor x17,x14,x25,ror#39 // Sigma0(a) 627 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 628 add x7,x7,x0 629 add x20,x20,x24 // d+=h 630 add x24,x24,x19 // h+=Maj(a,b,c) 631 ldr x19,[x30],#8 // *K++, x28 in next round 632 add x7,x7,x13 633 add x24,x24,x17 // h+=Sigma0(a) 634 add x7,x7,x12 635 ldr x12,[sp,#8] 636 str x15,[sp,#0] 637 ror x16,x20,#14 638 add x23,x23,x19 // h+=K[i] 639 ror x14,x9,#1 640 and x17,x21,x20 641 ror x13,x6,#19 642 bic x19,x22,x20 643 ror x15,x24,#28 644 add x23,x23,x7 // h+=X[i] 645 eor x16,x16,x20,ror#18 646 eor x14,x14,x9,ror#8 647 orr x17,x17,x19 // Ch(e,f,g) 648 eor x19,x24,x25 // a^b, b^c in next round 649 eor x16,x16,x20,ror#41 // Sigma1(e) 650 eor x15,x15,x24,ror#34 651 add x23,x23,x17 // h+=Ch(e,f,g) 652 and x28,x28,x19 // (b^c)&=(a^b) 653 eor x13,x13,x6,ror#61 654 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 655 add x23,x23,x16 // h+=Sigma1(e) 656 eor x28,x28,x25 // Maj(a,b,c) 657 eor x17,x15,x24,ror#39 // Sigma0(a) 658 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 659 add x8,x8,x1 660 add x27,x27,x23 // d+=h 661 add x23,x23,x28 // h+=Maj(a,b,c) 662 ldr x28,[x30],#8 // *K++, x19 in next round 663 add x8,x8,x14 664 add x23,x23,x17 // h+=Sigma0(a) 665 add x8,x8,x13 666 ldr x13,[sp,#16] 667 str x0,[sp,#8] 668 ror x16,x27,#14 669 add x22,x22,x28 // h+=K[i] 670 ror x15,x10,#1 671 and x17,x20,x27 672 ror x14,x7,#19 673 bic x28,x21,x27 674 ror x0,x23,#28 675 add x22,x22,x8 // h+=X[i] 676 eor x16,x16,x27,ror#18 677 eor x15,x15,x10,ror#8 678 orr x17,x17,x28 // Ch(e,f,g) 679 eor x28,x23,x24 // a^b, b^c in next round 680 eor x16,x16,x27,ror#41 // Sigma1(e) 681 eor x0,x0,x23,ror#34 682 add x22,x22,x17 // h+=Ch(e,f,g) 683 and x19,x19,x28 // (b^c)&=(a^b) 684 eor x14,x14,x7,ror#61 685 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 686 add x22,x22,x16 // h+=Sigma1(e) 687 eor x19,x19,x24 // Maj(a,b,c) 688 eor x17,x0,x23,ror#39 // Sigma0(a) 689 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 690 add x9,x9,x2 691 add x26,x26,x22 // d+=h 692 add x22,x22,x19 // h+=Maj(a,b,c) 693 ldr x19,[x30],#8 // *K++, x28 in next round 694 add x9,x9,x15 695 add x22,x22,x17 // h+=Sigma0(a) 696 add x9,x9,x14 697 ldr x14,[sp,#24] 698 str x1,[sp,#16] 699 ror x16,x26,#14 700 add x21,x21,x19 // h+=K[i] 701 ror x0,x11,#1 702 and x17,x27,x26 703 ror x15,x8,#19 704 bic x19,x20,x26 705 ror x1,x22,#28 706 add x21,x21,x9 // h+=X[i] 707 eor x16,x16,x26,ror#18 708 eor x0,x0,x11,ror#8 709 orr x17,x17,x19 // Ch(e,f,g) 710 eor x19,x22,x23 // a^b, b^c in next round 711 eor x16,x16,x26,ror#41 // Sigma1(e) 712 eor x1,x1,x22,ror#34 713 add x21,x21,x17 // h+=Ch(e,f,g) 714 and x28,x28,x19 // (b^c)&=(a^b) 715 eor x15,x15,x8,ror#61 716 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 717 add x21,x21,x16 // h+=Sigma1(e) 718 eor x28,x28,x23 // Maj(a,b,c) 719 eor x17,x1,x22,ror#39 // Sigma0(a) 720 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 721 add x10,x10,x3 722 add x25,x25,x21 // d+=h 723 add x21,x21,x28 // h+=Maj(a,b,c) 724 ldr x28,[x30],#8 // *K++, x19 in next round 725 add x10,x10,x0 726 add x21,x21,x17 // h+=Sigma0(a) 727 add x10,x10,x15 728 ldr x15,[sp,#0] 729 str x2,[sp,#24] 730 ror x16,x25,#14 731 add x20,x20,x28 // h+=K[i] 732 ror x1,x12,#1 733 and x17,x26,x25 734 ror x0,x9,#19 735 bic x28,x27,x25 736 ror x2,x21,#28 737 add x20,x20,x10 // h+=X[i] 738 eor x16,x16,x25,ror#18 739 eor x1,x1,x12,ror#8 740 orr x17,x17,x28 // Ch(e,f,g) 741 eor x28,x21,x22 // a^b, b^c in next round 742 eor x16,x16,x25,ror#41 // Sigma1(e) 743 eor x2,x2,x21,ror#34 744 add x20,x20,x17 // h+=Ch(e,f,g) 745 and x19,x19,x28 // (b^c)&=(a^b) 746 eor x0,x0,x9,ror#61 747 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 748 add x20,x20,x16 // h+=Sigma1(e) 749 eor x19,x19,x22 // Maj(a,b,c) 750 eor x17,x2,x21,ror#39 // Sigma0(a) 751 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 752 add x11,x11,x4 753 add x24,x24,x20 // d+=h 754 add x20,x20,x19 // h+=Maj(a,b,c) 755 ldr x19,[x30],#8 // *K++, x28 in next round 756 add x11,x11,x1 757 add x20,x20,x17 // h+=Sigma0(a) 758 add x11,x11,x0 759 ldr x0,[sp,#8] 760 str x3,[sp,#0] 761 ror x16,x24,#14 762 add x27,x27,x19 // h+=K[i] 763 ror x2,x13,#1 764 and x17,x25,x24 765 ror x1,x10,#19 766 bic x19,x26,x24 767 ror x3,x20,#28 768 add x27,x27,x11 // h+=X[i] 769 eor x16,x16,x24,ror#18 770 eor x2,x2,x13,ror#8 771 orr x17,x17,x19 // Ch(e,f,g) 772 eor x19,x20,x21 // a^b, b^c in next round 773 eor x16,x16,x24,ror#41 // Sigma1(e) 774 eor x3,x3,x20,ror#34 775 add x27,x27,x17 // h+=Ch(e,f,g) 776 and x28,x28,x19 // (b^c)&=(a^b) 777 eor x1,x1,x10,ror#61 778 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 779 add x27,x27,x16 // h+=Sigma1(e) 780 eor x28,x28,x21 // Maj(a,b,c) 781 eor x17,x3,x20,ror#39 // Sigma0(a) 782 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 783 add x12,x12,x5 784 add x23,x23,x27 // d+=h 785 add x27,x27,x28 // h+=Maj(a,b,c) 786 ldr x28,[x30],#8 // *K++, x19 in next round 787 add x12,x12,x2 788 add x27,x27,x17 // h+=Sigma0(a) 789 add x12,x12,x1 790 ldr x1,[sp,#16] 791 str x4,[sp,#8] 792 ror x16,x23,#14 793 add x26,x26,x28 // h+=K[i] 794 ror x3,x14,#1 795 and x17,x24,x23 796 ror x2,x11,#19 797 bic x28,x25,x23 798 ror x4,x27,#28 799 add x26,x26,x12 // h+=X[i] 800 eor x16,x16,x23,ror#18 801 eor x3,x3,x14,ror#8 802 orr x17,x17,x28 // Ch(e,f,g) 803 eor x28,x27,x20 // a^b, b^c in next round 804 eor x16,x16,x23,ror#41 // Sigma1(e) 805 eor x4,x4,x27,ror#34 806 add x26,x26,x17 // h+=Ch(e,f,g) 807 and x19,x19,x28 // (b^c)&=(a^b) 808 eor x2,x2,x11,ror#61 809 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 810 add x26,x26,x16 // h+=Sigma1(e) 811 eor x19,x19,x20 // Maj(a,b,c) 812 eor x17,x4,x27,ror#39 // Sigma0(a) 813 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 814 add x13,x13,x6 815 add x22,x22,x26 // d+=h 816 add x26,x26,x19 // h+=Maj(a,b,c) 817 ldr x19,[x30],#8 // *K++, x28 in next round 818 add x13,x13,x3 819 add x26,x26,x17 // h+=Sigma0(a) 820 add x13,x13,x2 821 ldr x2,[sp,#24] 822 str x5,[sp,#16] 823 ror x16,x22,#14 824 add x25,x25,x19 // h+=K[i] 825 ror x4,x15,#1 826 and x17,x23,x22 827 ror x3,x12,#19 828 bic x19,x24,x22 829 ror x5,x26,#28 830 add x25,x25,x13 // h+=X[i] 831 eor x16,x16,x22,ror#18 832 eor x4,x4,x15,ror#8 833 orr x17,x17,x19 // Ch(e,f,g) 834 eor x19,x26,x27 // a^b, b^c in next round 835 eor x16,x16,x22,ror#41 // Sigma1(e) 836 eor x5,x5,x26,ror#34 837 add x25,x25,x17 // h+=Ch(e,f,g) 838 and x28,x28,x19 // (b^c)&=(a^b) 839 eor x3,x3,x12,ror#61 840 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 841 add x25,x25,x16 // h+=Sigma1(e) 842 eor x28,x28,x27 // Maj(a,b,c) 843 eor x17,x5,x26,ror#39 // Sigma0(a) 844 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 845 add x14,x14,x7 846 add x21,x21,x25 // d+=h 847 add x25,x25,x28 // h+=Maj(a,b,c) 848 ldr x28,[x30],#8 // *K++, x19 in next round 849 add x14,x14,x4 850 add x25,x25,x17 // h+=Sigma0(a) 851 add x14,x14,x3 852 ldr x3,[sp,#0] 853 str x6,[sp,#24] 854 ror x16,x21,#14 855 add x24,x24,x28 // h+=K[i] 856 ror x5,x0,#1 857 and x17,x22,x21 858 ror x4,x13,#19 859 bic x28,x23,x21 860 ror x6,x25,#28 861 add x24,x24,x14 // h+=X[i] 862 eor x16,x16,x21,ror#18 863 eor x5,x5,x0,ror#8 864 orr x17,x17,x28 // Ch(e,f,g) 865 eor x28,x25,x26 // a^b, b^c in next round 866 eor x16,x16,x21,ror#41 // Sigma1(e) 867 eor x6,x6,x25,ror#34 868 add x24,x24,x17 // h+=Ch(e,f,g) 869 and x19,x19,x28 // (b^c)&=(a^b) 870 eor x4,x4,x13,ror#61 871 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 872 add x24,x24,x16 // h+=Sigma1(e) 873 eor x19,x19,x26 // Maj(a,b,c) 874 eor x17,x6,x25,ror#39 // Sigma0(a) 875 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 876 add x15,x15,x8 877 add x20,x20,x24 // d+=h 878 add x24,x24,x19 // h+=Maj(a,b,c) 879 ldr x19,[x30],#8 // *K++, x28 in next round 880 add x15,x15,x5 881 add x24,x24,x17 // h+=Sigma0(a) 882 add x15,x15,x4 883 ldr x4,[sp,#8] 884 str x7,[sp,#0] 885 ror x16,x20,#14 886 add x23,x23,x19 // h+=K[i] 887 ror x6,x1,#1 888 and x17,x21,x20 889 ror x5,x14,#19 890 bic x19,x22,x20 891 ror x7,x24,#28 892 add x23,x23,x15 // h+=X[i] 893 eor x16,x16,x20,ror#18 894 eor x6,x6,x1,ror#8 895 orr x17,x17,x19 // Ch(e,f,g) 896 eor x19,x24,x25 // a^b, b^c in next round 897 eor x16,x16,x20,ror#41 // Sigma1(e) 898 eor x7,x7,x24,ror#34 899 add x23,x23,x17 // h+=Ch(e,f,g) 900 and x28,x28,x19 // (b^c)&=(a^b) 901 eor x5,x5,x14,ror#61 902 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 903 add x23,x23,x16 // h+=Sigma1(e) 904 eor x28,x28,x25 // Maj(a,b,c) 905 eor x17,x7,x24,ror#39 // Sigma0(a) 906 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 907 add x0,x0,x9 908 add x27,x27,x23 // d+=h 909 add x23,x23,x28 // h+=Maj(a,b,c) 910 ldr x28,[x30],#8 // *K++, x19 in next round 911 add x0,x0,x6 912 add x23,x23,x17 // h+=Sigma0(a) 913 add x0,x0,x5 914 ldr x5,[sp,#16] 915 str x8,[sp,#8] 916 ror x16,x27,#14 917 add x22,x22,x28 // h+=K[i] 918 ror x7,x2,#1 919 and x17,x20,x27 920 ror x6,x15,#19 921 bic x28,x21,x27 922 ror x8,x23,#28 923 add x22,x22,x0 // h+=X[i] 924 eor x16,x16,x27,ror#18 925 eor x7,x7,x2,ror#8 926 orr x17,x17,x28 // Ch(e,f,g) 927 eor x28,x23,x24 // a^b, b^c in next round 928 eor x16,x16,x27,ror#41 // Sigma1(e) 929 eor x8,x8,x23,ror#34 930 add x22,x22,x17 // h+=Ch(e,f,g) 931 and x19,x19,x28 // (b^c)&=(a^b) 932 eor x6,x6,x15,ror#61 933 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 934 add x22,x22,x16 // h+=Sigma1(e) 935 eor x19,x19,x24 // Maj(a,b,c) 936 eor x17,x8,x23,ror#39 // Sigma0(a) 937 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 938 add x1,x1,x10 939 add x26,x26,x22 // d+=h 940 add x22,x22,x19 // h+=Maj(a,b,c) 941 ldr x19,[x30],#8 // *K++, x28 in next round 942 add x1,x1,x7 943 add x22,x22,x17 // h+=Sigma0(a) 944 add x1,x1,x6 945 ldr x6,[sp,#24] 946 str x9,[sp,#16] 947 ror x16,x26,#14 948 add x21,x21,x19 // h+=K[i] 949 ror x8,x3,#1 950 and x17,x27,x26 951 ror x7,x0,#19 952 bic x19,x20,x26 953 ror x9,x22,#28 954 add x21,x21,x1 // h+=X[i] 955 eor x16,x16,x26,ror#18 956 eor x8,x8,x3,ror#8 957 orr x17,x17,x19 // Ch(e,f,g) 958 eor x19,x22,x23 // a^b, b^c in next round 959 eor x16,x16,x26,ror#41 // Sigma1(e) 960 eor x9,x9,x22,ror#34 961 add x21,x21,x17 // h+=Ch(e,f,g) 962 and x28,x28,x19 // (b^c)&=(a^b) 963 eor x7,x7,x0,ror#61 964 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 965 add x21,x21,x16 // h+=Sigma1(e) 966 eor x28,x28,x23 // Maj(a,b,c) 967 eor x17,x9,x22,ror#39 // Sigma0(a) 968 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 969 add x2,x2,x11 970 add x25,x25,x21 // d+=h 971 add x21,x21,x28 // h+=Maj(a,b,c) 972 ldr x28,[x30],#8 // *K++, x19 in next round 973 add x2,x2,x8 974 add x21,x21,x17 // h+=Sigma0(a) 975 add x2,x2,x7 976 ldr x7,[sp,#0] 977 str x10,[sp,#24] 978 ror x16,x25,#14 979 add x20,x20,x28 // h+=K[i] 980 ror x9,x4,#1 981 and x17,x26,x25 982 ror x8,x1,#19 983 bic x28,x27,x25 984 ror x10,x21,#28 985 add x20,x20,x2 // h+=X[i] 986 eor x16,x16,x25,ror#18 987 eor x9,x9,x4,ror#8 988 orr x17,x17,x28 // Ch(e,f,g) 989 eor x28,x21,x22 // a^b, b^c in next round 990 eor x16,x16,x25,ror#41 // Sigma1(e) 991 eor x10,x10,x21,ror#34 992 add x20,x20,x17 // h+=Ch(e,f,g) 993 and x19,x19,x28 // (b^c)&=(a^b) 994 eor x8,x8,x1,ror#61 995 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 996 add x20,x20,x16 // h+=Sigma1(e) 997 eor x19,x19,x22 // Maj(a,b,c) 998 eor x17,x10,x21,ror#39 // Sigma0(a) 999 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1000 add x3,x3,x12 1001 add x24,x24,x20 // d+=h 1002 add x20,x20,x19 // h+=Maj(a,b,c) 1003 ldr x19,[x30],#8 // *K++, x28 in next round 1004 add x3,x3,x9 1005 add x20,x20,x17 // h+=Sigma0(a) 1006 add x3,x3,x8 1007 cbnz x19,.Loop_16_xx 1008 1009 ldp x0,x2,[x29,#96] 1010 ldr x1,[x29,#112] 1011 sub x30,x30,#648 // rewind 1012 1013 ldp x3,x4,[x0] 1014 ldp x5,x6,[x0,#2*8] 1015 add x1,x1,#14*8 // advance input pointer 1016 ldp x7,x8,[x0,#4*8] 1017 add x20,x20,x3 1018 ldp x9,x10,[x0,#6*8] 1019 add x21,x21,x4 1020 add x22,x22,x5 1021 add x23,x23,x6 1022 stp x20,x21,[x0] 1023 add x24,x24,x7 1024 add x25,x25,x8 1025 stp x22,x23,[x0,#2*8] 1026 add x26,x26,x9 1027 add x27,x27,x10 1028 cmp x1,x2 1029 stp x24,x25,[x0,#4*8] 1030 stp x26,x27,[x0,#6*8] 1031 b.ne .Loop 1032 1033 ldp x19,x20,[x29,#16] 1034 add sp,sp,#4*8 1035 ldp x21,x22,[x29,#32] 1036 ldp x23,x24,[x29,#48] 1037 ldp x25,x26,[x29,#64] 1038 ldp x27,x28,[x29,#80] 1039 ldp x29,x30,[sp],#128 1040.inst 0xd50323bf // autiasp 1041 ret 1042.size sha512_block_data_order,.-sha512_block_data_order 1043 1044.align 6 1045.type .LK512,%object 1046.LK512: 1047.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1048.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1049.quad 0x3956c25bf348b538,0x59f111f1b605d019 1050.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1051.quad 0xd807aa98a3030242,0x12835b0145706fbe 1052.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1053.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1054.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1055.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1056.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1057.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1058.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1059.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1060.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1061.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1062.quad 0x06ca6351e003826f,0x142929670a0e6e70 1063.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1064.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1065.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1066.quad 0x81c2c92e47edaee6,0x92722c851482353b 1067.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1068.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1069.quad 0xd192e819d6ef5218,0xd69906245565a910 1070.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1071.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1072.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1073.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1074.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1075.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1076.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1077.quad 0x90befffa23631e28,0xa4506cebde82bde9 1078.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1079.quad 0xca273eceea26619c,0xd186b8c721c0c207 1080.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1081.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1082.quad 0x113f9804bef90dae,0x1b710b35131c471b 1083.quad 0x28db77f523047d84,0x32caab7b40c72493 1084.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1085.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1086.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1087.quad 0 // terminator 1088.size .LK512,.-.LK512 1089#ifndef __KERNEL__ 1090.align 3 1091.LOPENSSL_armcap_P: 1092# ifdef __ILP32__ 1093.long OPENSSL_armcap_P-. 1094# else 1095.quad OPENSSL_armcap_P-. 1096# endif 1097#endif 1098.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1099.align 2 1100.align 2 1101#ifndef __KERNEL__ 1102.type sha512_block_armv8,%function 1103.align 6 1104sha512_block_armv8: 1105.Lv8_entry: 1106 stp x29,x30,[sp,#-16]! 1107 add x29,sp,#0 1108 1109 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1110 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1111 1112 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1113 adr x3,.LK512 1114 1115 rev64 v16.16b,v16.16b 1116 rev64 v17.16b,v17.16b 1117 rev64 v18.16b,v18.16b 1118 rev64 v19.16b,v19.16b 1119 rev64 v20.16b,v20.16b 1120 rev64 v21.16b,v21.16b 1121 rev64 v22.16b,v22.16b 1122 rev64 v23.16b,v23.16b 1123 b .Loop_hw 1124 1125.align 4 1126.Loop_hw: 1127 ld1 {v24.2d},[x3],#16 1128 subs x2,x2,#1 1129 sub x4,x1,#128 1130 orr v26.16b,v0.16b,v0.16b // offload 1131 orr v27.16b,v1.16b,v1.16b 1132 orr v28.16b,v2.16b,v2.16b 1133 orr v29.16b,v3.16b,v3.16b 1134 csel x1,x1,x4,ne // conditional rewind 1135 add v24.2d,v24.2d,v16.2d 1136 ld1 {v25.2d},[x3],#16 1137 ext v24.16b,v24.16b,v24.16b,#8 1138 ext v5.16b,v2.16b,v3.16b,#8 1139 ext v6.16b,v1.16b,v2.16b,#8 1140 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1141.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1142 ext v7.16b,v20.16b,v21.16b,#8 1143.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1144.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1145 add v4.2d,v1.2d,v3.2d // "D + T1" 1146.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1147 add v25.2d,v25.2d,v17.2d 1148 ld1 {v24.2d},[x3],#16 1149 ext v25.16b,v25.16b,v25.16b,#8 1150 ext v5.16b,v4.16b,v2.16b,#8 1151 ext v6.16b,v0.16b,v4.16b,#8 1152 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1153.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1154 ext v7.16b,v21.16b,v22.16b,#8 1155.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1156.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1157 add v1.2d,v0.2d,v2.2d // "D + T1" 1158.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1159 add v24.2d,v24.2d,v18.2d 1160 ld1 {v25.2d},[x3],#16 1161 ext v24.16b,v24.16b,v24.16b,#8 1162 ext v5.16b,v1.16b,v4.16b,#8 1163 ext v6.16b,v3.16b,v1.16b,#8 1164 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1165.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1166 ext v7.16b,v22.16b,v23.16b,#8 1167.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1168.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1169 add v0.2d,v3.2d,v4.2d // "D + T1" 1170.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1171 add v25.2d,v25.2d,v19.2d 1172 ld1 {v24.2d},[x3],#16 1173 ext v25.16b,v25.16b,v25.16b,#8 1174 ext v5.16b,v0.16b,v1.16b,#8 1175 ext v6.16b,v2.16b,v0.16b,#8 1176 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1177.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1178 ext v7.16b,v23.16b,v16.16b,#8 1179.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1180.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1181 add v3.2d,v2.2d,v1.2d // "D + T1" 1182.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1183 add v24.2d,v24.2d,v20.2d 1184 ld1 {v25.2d},[x3],#16 1185 ext v24.16b,v24.16b,v24.16b,#8 1186 ext v5.16b,v3.16b,v0.16b,#8 1187 ext v6.16b,v4.16b,v3.16b,#8 1188 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1189.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1190 ext v7.16b,v16.16b,v17.16b,#8 1191.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1192.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1193 add v2.2d,v4.2d,v0.2d // "D + T1" 1194.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1195 add v25.2d,v25.2d,v21.2d 1196 ld1 {v24.2d},[x3],#16 1197 ext v25.16b,v25.16b,v25.16b,#8 1198 ext v5.16b,v2.16b,v3.16b,#8 1199 ext v6.16b,v1.16b,v2.16b,#8 1200 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1201.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1202 ext v7.16b,v17.16b,v18.16b,#8 1203.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1204.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1205 add v4.2d,v1.2d,v3.2d // "D + T1" 1206.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1207 add v24.2d,v24.2d,v22.2d 1208 ld1 {v25.2d},[x3],#16 1209 ext v24.16b,v24.16b,v24.16b,#8 1210 ext v5.16b,v4.16b,v2.16b,#8 1211 ext v6.16b,v0.16b,v4.16b,#8 1212 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1213.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1214 ext v7.16b,v18.16b,v19.16b,#8 1215.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1216.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1217 add v1.2d,v0.2d,v2.2d // "D + T1" 1218.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1219 add v25.2d,v25.2d,v23.2d 1220 ld1 {v24.2d},[x3],#16 1221 ext v25.16b,v25.16b,v25.16b,#8 1222 ext v5.16b,v1.16b,v4.16b,#8 1223 ext v6.16b,v3.16b,v1.16b,#8 1224 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1225.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1226 ext v7.16b,v19.16b,v20.16b,#8 1227.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1228.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1229 add v0.2d,v3.2d,v4.2d // "D + T1" 1230.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1231 add v24.2d,v24.2d,v16.2d 1232 ld1 {v25.2d},[x3],#16 1233 ext v24.16b,v24.16b,v24.16b,#8 1234 ext v5.16b,v0.16b,v1.16b,#8 1235 ext v6.16b,v2.16b,v0.16b,#8 1236 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1237.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1238 ext v7.16b,v20.16b,v21.16b,#8 1239.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1240.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1241 add v3.2d,v2.2d,v1.2d // "D + T1" 1242.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1243 add v25.2d,v25.2d,v17.2d 1244 ld1 {v24.2d},[x3],#16 1245 ext v25.16b,v25.16b,v25.16b,#8 1246 ext v5.16b,v3.16b,v0.16b,#8 1247 ext v6.16b,v4.16b,v3.16b,#8 1248 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1249.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1250 ext v7.16b,v21.16b,v22.16b,#8 1251.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1252.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1253 add v2.2d,v4.2d,v0.2d // "D + T1" 1254.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1255 add v24.2d,v24.2d,v18.2d 1256 ld1 {v25.2d},[x3],#16 1257 ext v24.16b,v24.16b,v24.16b,#8 1258 ext v5.16b,v2.16b,v3.16b,#8 1259 ext v6.16b,v1.16b,v2.16b,#8 1260 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1261.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1262 ext v7.16b,v22.16b,v23.16b,#8 1263.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1264.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1265 add v4.2d,v1.2d,v3.2d // "D + T1" 1266.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1267 add v25.2d,v25.2d,v19.2d 1268 ld1 {v24.2d},[x3],#16 1269 ext v25.16b,v25.16b,v25.16b,#8 1270 ext v5.16b,v4.16b,v2.16b,#8 1271 ext v6.16b,v0.16b,v4.16b,#8 1272 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1273.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1274 ext v7.16b,v23.16b,v16.16b,#8 1275.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1276.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1277 add v1.2d,v0.2d,v2.2d // "D + T1" 1278.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1279 add v24.2d,v24.2d,v20.2d 1280 ld1 {v25.2d},[x3],#16 1281 ext v24.16b,v24.16b,v24.16b,#8 1282 ext v5.16b,v1.16b,v4.16b,#8 1283 ext v6.16b,v3.16b,v1.16b,#8 1284 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1285.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1286 ext v7.16b,v16.16b,v17.16b,#8 1287.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1288.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1289 add v0.2d,v3.2d,v4.2d // "D + T1" 1290.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1291 add v25.2d,v25.2d,v21.2d 1292 ld1 {v24.2d},[x3],#16 1293 ext v25.16b,v25.16b,v25.16b,#8 1294 ext v5.16b,v0.16b,v1.16b,#8 1295 ext v6.16b,v2.16b,v0.16b,#8 1296 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1297.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1298 ext v7.16b,v17.16b,v18.16b,#8 1299.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1300.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1301 add v3.2d,v2.2d,v1.2d // "D + T1" 1302.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1303 add v24.2d,v24.2d,v22.2d 1304 ld1 {v25.2d},[x3],#16 1305 ext v24.16b,v24.16b,v24.16b,#8 1306 ext v5.16b,v3.16b,v0.16b,#8 1307 ext v6.16b,v4.16b,v3.16b,#8 1308 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1309.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1310 ext v7.16b,v18.16b,v19.16b,#8 1311.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1312.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1313 add v2.2d,v4.2d,v0.2d // "D + T1" 1314.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1315 add v25.2d,v25.2d,v23.2d 1316 ld1 {v24.2d},[x3],#16 1317 ext v25.16b,v25.16b,v25.16b,#8 1318 ext v5.16b,v2.16b,v3.16b,#8 1319 ext v6.16b,v1.16b,v2.16b,#8 1320 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1321.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1322 ext v7.16b,v19.16b,v20.16b,#8 1323.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1324.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1325 add v4.2d,v1.2d,v3.2d // "D + T1" 1326.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1327 add v24.2d,v24.2d,v16.2d 1328 ld1 {v25.2d},[x3],#16 1329 ext v24.16b,v24.16b,v24.16b,#8 1330 ext v5.16b,v4.16b,v2.16b,#8 1331 ext v6.16b,v0.16b,v4.16b,#8 1332 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1333.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1334 ext v7.16b,v20.16b,v21.16b,#8 1335.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1336.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1337 add v1.2d,v0.2d,v2.2d // "D + T1" 1338.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1339 add v25.2d,v25.2d,v17.2d 1340 ld1 {v24.2d},[x3],#16 1341 ext v25.16b,v25.16b,v25.16b,#8 1342 ext v5.16b,v1.16b,v4.16b,#8 1343 ext v6.16b,v3.16b,v1.16b,#8 1344 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1345.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1346 ext v7.16b,v21.16b,v22.16b,#8 1347.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1348.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1349 add v0.2d,v3.2d,v4.2d // "D + T1" 1350.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1351 add v24.2d,v24.2d,v18.2d 1352 ld1 {v25.2d},[x3],#16 1353 ext v24.16b,v24.16b,v24.16b,#8 1354 ext v5.16b,v0.16b,v1.16b,#8 1355 ext v6.16b,v2.16b,v0.16b,#8 1356 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1357.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1358 ext v7.16b,v22.16b,v23.16b,#8 1359.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1360.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1361 add v3.2d,v2.2d,v1.2d // "D + T1" 1362.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1363 add v25.2d,v25.2d,v19.2d 1364 ld1 {v24.2d},[x3],#16 1365 ext v25.16b,v25.16b,v25.16b,#8 1366 ext v5.16b,v3.16b,v0.16b,#8 1367 ext v6.16b,v4.16b,v3.16b,#8 1368 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1369.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1370 ext v7.16b,v23.16b,v16.16b,#8 1371.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1372.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1373 add v2.2d,v4.2d,v0.2d // "D + T1" 1374.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1375 add v24.2d,v24.2d,v20.2d 1376 ld1 {v25.2d},[x3],#16 1377 ext v24.16b,v24.16b,v24.16b,#8 1378 ext v5.16b,v2.16b,v3.16b,#8 1379 ext v6.16b,v1.16b,v2.16b,#8 1380 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1381.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1382 ext v7.16b,v16.16b,v17.16b,#8 1383.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1384.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1385 add v4.2d,v1.2d,v3.2d // "D + T1" 1386.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1387 add v25.2d,v25.2d,v21.2d 1388 ld1 {v24.2d},[x3],#16 1389 ext v25.16b,v25.16b,v25.16b,#8 1390 ext v5.16b,v4.16b,v2.16b,#8 1391 ext v6.16b,v0.16b,v4.16b,#8 1392 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1393.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1394 ext v7.16b,v17.16b,v18.16b,#8 1395.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1396.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1397 add v1.2d,v0.2d,v2.2d // "D + T1" 1398.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1399 add v24.2d,v24.2d,v22.2d 1400 ld1 {v25.2d},[x3],#16 1401 ext v24.16b,v24.16b,v24.16b,#8 1402 ext v5.16b,v1.16b,v4.16b,#8 1403 ext v6.16b,v3.16b,v1.16b,#8 1404 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1405.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1406 ext v7.16b,v18.16b,v19.16b,#8 1407.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1408.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1409 add v0.2d,v3.2d,v4.2d // "D + T1" 1410.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1411 add v25.2d,v25.2d,v23.2d 1412 ld1 {v24.2d},[x3],#16 1413 ext v25.16b,v25.16b,v25.16b,#8 1414 ext v5.16b,v0.16b,v1.16b,#8 1415 ext v6.16b,v2.16b,v0.16b,#8 1416 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1417.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1418 ext v7.16b,v19.16b,v20.16b,#8 1419.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1420.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1421 add v3.2d,v2.2d,v1.2d // "D + T1" 1422.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1423 add v24.2d,v24.2d,v16.2d 1424 ld1 {v25.2d},[x3],#16 1425 ext v24.16b,v24.16b,v24.16b,#8 1426 ext v5.16b,v3.16b,v0.16b,#8 1427 ext v6.16b,v4.16b,v3.16b,#8 1428 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1429.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1430 ext v7.16b,v20.16b,v21.16b,#8 1431.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1432.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1433 add v2.2d,v4.2d,v0.2d // "D + T1" 1434.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1435 add v25.2d,v25.2d,v17.2d 1436 ld1 {v24.2d},[x3],#16 1437 ext v25.16b,v25.16b,v25.16b,#8 1438 ext v5.16b,v2.16b,v3.16b,#8 1439 ext v6.16b,v1.16b,v2.16b,#8 1440 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1441.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1442 ext v7.16b,v21.16b,v22.16b,#8 1443.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1444.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1445 add v4.2d,v1.2d,v3.2d // "D + T1" 1446.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1447 add v24.2d,v24.2d,v18.2d 1448 ld1 {v25.2d},[x3],#16 1449 ext v24.16b,v24.16b,v24.16b,#8 1450 ext v5.16b,v4.16b,v2.16b,#8 1451 ext v6.16b,v0.16b,v4.16b,#8 1452 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1453.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1454 ext v7.16b,v22.16b,v23.16b,#8 1455.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1456.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1457 add v1.2d,v0.2d,v2.2d // "D + T1" 1458.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1459 add v25.2d,v25.2d,v19.2d 1460 ld1 {v24.2d},[x3],#16 1461 ext v25.16b,v25.16b,v25.16b,#8 1462 ext v5.16b,v1.16b,v4.16b,#8 1463 ext v6.16b,v3.16b,v1.16b,#8 1464 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1465.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1466 ext v7.16b,v23.16b,v16.16b,#8 1467.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1468.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1469 add v0.2d,v3.2d,v4.2d // "D + T1" 1470.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1471 add v24.2d,v24.2d,v20.2d 1472 ld1 {v25.2d},[x3],#16 1473 ext v24.16b,v24.16b,v24.16b,#8 1474 ext v5.16b,v0.16b,v1.16b,#8 1475 ext v6.16b,v2.16b,v0.16b,#8 1476 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1477.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1478 ext v7.16b,v16.16b,v17.16b,#8 1479.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1480.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1481 add v3.2d,v2.2d,v1.2d // "D + T1" 1482.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1483 add v25.2d,v25.2d,v21.2d 1484 ld1 {v24.2d},[x3],#16 1485 ext v25.16b,v25.16b,v25.16b,#8 1486 ext v5.16b,v3.16b,v0.16b,#8 1487 ext v6.16b,v4.16b,v3.16b,#8 1488 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1489.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1490 ext v7.16b,v17.16b,v18.16b,#8 1491.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1492.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1493 add v2.2d,v4.2d,v0.2d // "D + T1" 1494.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1495 add v24.2d,v24.2d,v22.2d 1496 ld1 {v25.2d},[x3],#16 1497 ext v24.16b,v24.16b,v24.16b,#8 1498 ext v5.16b,v2.16b,v3.16b,#8 1499 ext v6.16b,v1.16b,v2.16b,#8 1500 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1501.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1502 ext v7.16b,v18.16b,v19.16b,#8 1503.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1504.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1505 add v4.2d,v1.2d,v3.2d // "D + T1" 1506.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1507 add v25.2d,v25.2d,v23.2d 1508 ld1 {v24.2d},[x3],#16 1509 ext v25.16b,v25.16b,v25.16b,#8 1510 ext v5.16b,v4.16b,v2.16b,#8 1511 ext v6.16b,v0.16b,v4.16b,#8 1512 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1513.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1514 ext v7.16b,v19.16b,v20.16b,#8 1515.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1516.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1517 add v1.2d,v0.2d,v2.2d // "D + T1" 1518.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1519 ld1 {v25.2d},[x3],#16 1520 add v24.2d,v24.2d,v16.2d 1521 ld1 {v16.16b},[x1],#16 // load next input 1522 ext v24.16b,v24.16b,v24.16b,#8 1523 ext v5.16b,v1.16b,v4.16b,#8 1524 ext v6.16b,v3.16b,v1.16b,#8 1525 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1526.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1527 rev64 v16.16b,v16.16b 1528 add v0.2d,v3.2d,v4.2d // "D + T1" 1529.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1530 ld1 {v24.2d},[x3],#16 1531 add v25.2d,v25.2d,v17.2d 1532 ld1 {v17.16b},[x1],#16 // load next input 1533 ext v25.16b,v25.16b,v25.16b,#8 1534 ext v5.16b,v0.16b,v1.16b,#8 1535 ext v6.16b,v2.16b,v0.16b,#8 1536 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1537.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1538 rev64 v17.16b,v17.16b 1539 add v3.2d,v2.2d,v1.2d // "D + T1" 1540.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1541 ld1 {v25.2d},[x3],#16 1542 add v24.2d,v24.2d,v18.2d 1543 ld1 {v18.16b},[x1],#16 // load next input 1544 ext v24.16b,v24.16b,v24.16b,#8 1545 ext v5.16b,v3.16b,v0.16b,#8 1546 ext v6.16b,v4.16b,v3.16b,#8 1547 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1548.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1549 rev64 v18.16b,v18.16b 1550 add v2.2d,v4.2d,v0.2d // "D + T1" 1551.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1552 ld1 {v24.2d},[x3],#16 1553 add v25.2d,v25.2d,v19.2d 1554 ld1 {v19.16b},[x1],#16 // load next input 1555 ext v25.16b,v25.16b,v25.16b,#8 1556 ext v5.16b,v2.16b,v3.16b,#8 1557 ext v6.16b,v1.16b,v2.16b,#8 1558 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1559.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1560 rev64 v19.16b,v19.16b 1561 add v4.2d,v1.2d,v3.2d // "D + T1" 1562.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1563 ld1 {v25.2d},[x3],#16 1564 add v24.2d,v24.2d,v20.2d 1565 ld1 {v20.16b},[x1],#16 // load next input 1566 ext v24.16b,v24.16b,v24.16b,#8 1567 ext v5.16b,v4.16b,v2.16b,#8 1568 ext v6.16b,v0.16b,v4.16b,#8 1569 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1570.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1571 rev64 v20.16b,v20.16b 1572 add v1.2d,v0.2d,v2.2d // "D + T1" 1573.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1574 ld1 {v24.2d},[x3],#16 1575 add v25.2d,v25.2d,v21.2d 1576 ld1 {v21.16b},[x1],#16 // load next input 1577 ext v25.16b,v25.16b,v25.16b,#8 1578 ext v5.16b,v1.16b,v4.16b,#8 1579 ext v6.16b,v3.16b,v1.16b,#8 1580 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1581.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1582 rev64 v21.16b,v21.16b 1583 add v0.2d,v3.2d,v4.2d // "D + T1" 1584.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1585 ld1 {v25.2d},[x3],#16 1586 add v24.2d,v24.2d,v22.2d 1587 ld1 {v22.16b},[x1],#16 // load next input 1588 ext v24.16b,v24.16b,v24.16b,#8 1589 ext v5.16b,v0.16b,v1.16b,#8 1590 ext v6.16b,v2.16b,v0.16b,#8 1591 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1592.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1593 rev64 v22.16b,v22.16b 1594 add v3.2d,v2.2d,v1.2d // "D + T1" 1595.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1596 sub x3,x3,#80*8 // rewind 1597 add v25.2d,v25.2d,v23.2d 1598 ld1 {v23.16b},[x1],#16 // load next input 1599 ext v25.16b,v25.16b,v25.16b,#8 1600 ext v5.16b,v3.16b,v0.16b,#8 1601 ext v6.16b,v4.16b,v3.16b,#8 1602 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1603.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1604 rev64 v23.16b,v23.16b 1605 add v2.2d,v4.2d,v0.2d // "D + T1" 1606.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1607 add v0.2d,v0.2d,v26.2d // accumulate 1608 add v1.2d,v1.2d,v27.2d 1609 add v2.2d,v2.2d,v28.2d 1610 add v3.2d,v3.2d,v29.2d 1611 1612 cbnz x2,.Loop_hw 1613 1614 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1615 1616 ldr x29,[sp],#16 1617 ret 1618.size sha512_block_armv8,.-sha512_block_armv8 1619#endif 1620