1/* Do not modify. This file is auto-generated from sha512-armv8.pl. */ 2// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. 3// 4// Licensed under the Apache License 2.0 (the "License"). You may not use 5// this file except in compliance with the License. You can obtain a copy 6// in the file LICENSE in the source distribution or at 7// https://www.openssl.org/source/license.html 8 9// ==================================================================== 10// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 11// project. The module is, however, dual licensed under OpenSSL and 12// CRYPTOGAMS licenses depending on where you obtain it. For further 13// details see http://www.openssl.org/~appro/cryptogams/. 14// 15// Permission to use under GPLv2 terms is granted. 16// ==================================================================== 17// 18// SHA256/512 for ARMv8. 19// 20// Performance in cycles per processed byte and improvement coefficient 21// over code generated with "default" compiler: 22// 23// SHA256-hw SHA256(*) SHA512 24// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) 25// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) 26// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) 27// Denver 2.01 10.5 (+26%) 6.70 (+8%) 28// X-Gene 20.0 (+100%) 12.8 (+300%(***)) 29// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) 30// Kryo 1.92 17.4 (+30%) 11.2 (+8%) 31// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%) 32// 33// (*) Software SHA256 results are of lesser relevance, presented 34// mostly for informational purposes. 35// (**) The result is a trade-off: it's possible to improve it by 36// 10% (or by 1 cycle per round), but at the cost of 20% loss 37// on Cortex-A53 (or by 4 cycles per round). 38// (***) Super-impressive coefficients over gcc-generated code are 39// indication of some compiler "pathology", most notably code 40// generated with -mgeneral-regs-only is significantly faster 41// and the gap is only 40-90%. 42// 43// October 2016. 44// 45// Originally it was reckoned that it makes no sense to implement NEON 46// version of SHA256 for 64-bit processors. This is because performance 47// improvement on most wide-spread Cortex-A5x processors was observed 48// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was 49// observed that 32-bit NEON SHA256 performs significantly better than 50// 64-bit scalar version on *some* of the more recent processors. As 51// result 64-bit NEON version of SHA256 was added to provide best 52// all-round performance. For example it executes ~30% faster on X-Gene 53// and Mongoose. [For reference, NEON version of SHA512 is bound to 54// deliver much less improvement, likely *negative* on Cortex-A5x. 55// Which is why NEON support is limited to SHA256.] 56 57// $output is the last argument if it looks like a file (it has an extension) 58// $flavour is the first argument if it doesn't look like a file 59#ifndef __KERNEL__ 60# include "arm_arch.h" 61 62.hidden OPENSSL_armcap_P 63#endif 64 65.text 66 67.globl sha512_block_data_order 68.type sha512_block_data_order,%function 69.align 6 70sha512_block_data_order: 71#ifndef __KERNEL__ 72 adrp x16,OPENSSL_armcap_P 73 ldr w16,[x16,#:lo12:OPENSSL_armcap_P] 74 tst w16,#ARMV8_SHA512 75 b.ne .Lv8_entry 76#endif 77.inst 0xd503233f // paciasp 78 stp x29,x30,[sp,#-128]! 79 add x29,sp,#0 80 81 stp x19,x20,[sp,#16] 82 stp x21,x22,[sp,#32] 83 stp x23,x24,[sp,#48] 84 stp x25,x26,[sp,#64] 85 stp x27,x28,[sp,#80] 86 sub sp,sp,#4*8 87 88 ldp x20,x21,[x0] // load context 89 ldp x22,x23,[x0,#2*8] 90 ldp x24,x25,[x0,#4*8] 91 add x2,x1,x2,lsl#7 // end of input 92 ldp x26,x27,[x0,#6*8] 93 adr x30,.LK512 94 stp x0,x2,[x29,#96] 95 96.Loop: 97 ldp x3,x4,[x1],#2*8 98 ldr x19,[x30],#8 // *K++ 99 eor x28,x21,x22 // magic seed 100 str x1,[x29,#112] 101#ifndef __AARCH64EB__ 102 rev x3,x3 // 0 103#endif 104 ror x16,x24,#14 105 add x27,x27,x19 // h+=K[i] 106 eor x6,x24,x24,ror#23 107 and x17,x25,x24 108 bic x19,x26,x24 109 add x27,x27,x3 // h+=X[i] 110 orr x17,x17,x19 // Ch(e,f,g) 111 eor x19,x20,x21 // a^b, b^c in next round 112 eor x16,x16,x6,ror#18 // Sigma1(e) 113 ror x6,x20,#28 114 add x27,x27,x17 // h+=Ch(e,f,g) 115 eor x17,x20,x20,ror#5 116 add x27,x27,x16 // h+=Sigma1(e) 117 and x28,x28,x19 // (b^c)&=(a^b) 118 add x23,x23,x27 // d+=h 119 eor x28,x28,x21 // Maj(a,b,c) 120 eor x17,x6,x17,ror#34 // Sigma0(a) 121 add x27,x27,x28 // h+=Maj(a,b,c) 122 ldr x28,[x30],#8 // *K++, x19 in next round 123 //add x27,x27,x17 // h+=Sigma0(a) 124#ifndef __AARCH64EB__ 125 rev x4,x4 // 1 126#endif 127 ldp x5,x6,[x1],#2*8 128 add x27,x27,x17 // h+=Sigma0(a) 129 ror x16,x23,#14 130 add x26,x26,x28 // h+=K[i] 131 eor x7,x23,x23,ror#23 132 and x17,x24,x23 133 bic x28,x25,x23 134 add x26,x26,x4 // h+=X[i] 135 orr x17,x17,x28 // Ch(e,f,g) 136 eor x28,x27,x20 // a^b, b^c in next round 137 eor x16,x16,x7,ror#18 // Sigma1(e) 138 ror x7,x27,#28 139 add x26,x26,x17 // h+=Ch(e,f,g) 140 eor x17,x27,x27,ror#5 141 add x26,x26,x16 // h+=Sigma1(e) 142 and x19,x19,x28 // (b^c)&=(a^b) 143 add x22,x22,x26 // d+=h 144 eor x19,x19,x20 // Maj(a,b,c) 145 eor x17,x7,x17,ror#34 // Sigma0(a) 146 add x26,x26,x19 // h+=Maj(a,b,c) 147 ldr x19,[x30],#8 // *K++, x28 in next round 148 //add x26,x26,x17 // h+=Sigma0(a) 149#ifndef __AARCH64EB__ 150 rev x5,x5 // 2 151#endif 152 add x26,x26,x17 // h+=Sigma0(a) 153 ror x16,x22,#14 154 add x25,x25,x19 // h+=K[i] 155 eor x8,x22,x22,ror#23 156 and x17,x23,x22 157 bic x19,x24,x22 158 add x25,x25,x5 // h+=X[i] 159 orr x17,x17,x19 // Ch(e,f,g) 160 eor x19,x26,x27 // a^b, b^c in next round 161 eor x16,x16,x8,ror#18 // Sigma1(e) 162 ror x8,x26,#28 163 add x25,x25,x17 // h+=Ch(e,f,g) 164 eor x17,x26,x26,ror#5 165 add x25,x25,x16 // h+=Sigma1(e) 166 and x28,x28,x19 // (b^c)&=(a^b) 167 add x21,x21,x25 // d+=h 168 eor x28,x28,x27 // Maj(a,b,c) 169 eor x17,x8,x17,ror#34 // Sigma0(a) 170 add x25,x25,x28 // h+=Maj(a,b,c) 171 ldr x28,[x30],#8 // *K++, x19 in next round 172 //add x25,x25,x17 // h+=Sigma0(a) 173#ifndef __AARCH64EB__ 174 rev x6,x6 // 3 175#endif 176 ldp x7,x8,[x1],#2*8 177 add x25,x25,x17 // h+=Sigma0(a) 178 ror x16,x21,#14 179 add x24,x24,x28 // h+=K[i] 180 eor x9,x21,x21,ror#23 181 and x17,x22,x21 182 bic x28,x23,x21 183 add x24,x24,x6 // h+=X[i] 184 orr x17,x17,x28 // Ch(e,f,g) 185 eor x28,x25,x26 // a^b, b^c in next round 186 eor x16,x16,x9,ror#18 // Sigma1(e) 187 ror x9,x25,#28 188 add x24,x24,x17 // h+=Ch(e,f,g) 189 eor x17,x25,x25,ror#5 190 add x24,x24,x16 // h+=Sigma1(e) 191 and x19,x19,x28 // (b^c)&=(a^b) 192 add x20,x20,x24 // d+=h 193 eor x19,x19,x26 // Maj(a,b,c) 194 eor x17,x9,x17,ror#34 // Sigma0(a) 195 add x24,x24,x19 // h+=Maj(a,b,c) 196 ldr x19,[x30],#8 // *K++, x28 in next round 197 //add x24,x24,x17 // h+=Sigma0(a) 198#ifndef __AARCH64EB__ 199 rev x7,x7 // 4 200#endif 201 add x24,x24,x17 // h+=Sigma0(a) 202 ror x16,x20,#14 203 add x23,x23,x19 // h+=K[i] 204 eor x10,x20,x20,ror#23 205 and x17,x21,x20 206 bic x19,x22,x20 207 add x23,x23,x7 // h+=X[i] 208 orr x17,x17,x19 // Ch(e,f,g) 209 eor x19,x24,x25 // a^b, b^c in next round 210 eor x16,x16,x10,ror#18 // Sigma1(e) 211 ror x10,x24,#28 212 add x23,x23,x17 // h+=Ch(e,f,g) 213 eor x17,x24,x24,ror#5 214 add x23,x23,x16 // h+=Sigma1(e) 215 and x28,x28,x19 // (b^c)&=(a^b) 216 add x27,x27,x23 // d+=h 217 eor x28,x28,x25 // Maj(a,b,c) 218 eor x17,x10,x17,ror#34 // Sigma0(a) 219 add x23,x23,x28 // h+=Maj(a,b,c) 220 ldr x28,[x30],#8 // *K++, x19 in next round 221 //add x23,x23,x17 // h+=Sigma0(a) 222#ifndef __AARCH64EB__ 223 rev x8,x8 // 5 224#endif 225 ldp x9,x10,[x1],#2*8 226 add x23,x23,x17 // h+=Sigma0(a) 227 ror x16,x27,#14 228 add x22,x22,x28 // h+=K[i] 229 eor x11,x27,x27,ror#23 230 and x17,x20,x27 231 bic x28,x21,x27 232 add x22,x22,x8 // h+=X[i] 233 orr x17,x17,x28 // Ch(e,f,g) 234 eor x28,x23,x24 // a^b, b^c in next round 235 eor x16,x16,x11,ror#18 // Sigma1(e) 236 ror x11,x23,#28 237 add x22,x22,x17 // h+=Ch(e,f,g) 238 eor x17,x23,x23,ror#5 239 add x22,x22,x16 // h+=Sigma1(e) 240 and x19,x19,x28 // (b^c)&=(a^b) 241 add x26,x26,x22 // d+=h 242 eor x19,x19,x24 // Maj(a,b,c) 243 eor x17,x11,x17,ror#34 // Sigma0(a) 244 add x22,x22,x19 // h+=Maj(a,b,c) 245 ldr x19,[x30],#8 // *K++, x28 in next round 246 //add x22,x22,x17 // h+=Sigma0(a) 247#ifndef __AARCH64EB__ 248 rev x9,x9 // 6 249#endif 250 add x22,x22,x17 // h+=Sigma0(a) 251 ror x16,x26,#14 252 add x21,x21,x19 // h+=K[i] 253 eor x12,x26,x26,ror#23 254 and x17,x27,x26 255 bic x19,x20,x26 256 add x21,x21,x9 // h+=X[i] 257 orr x17,x17,x19 // Ch(e,f,g) 258 eor x19,x22,x23 // a^b, b^c in next round 259 eor x16,x16,x12,ror#18 // Sigma1(e) 260 ror x12,x22,#28 261 add x21,x21,x17 // h+=Ch(e,f,g) 262 eor x17,x22,x22,ror#5 263 add x21,x21,x16 // h+=Sigma1(e) 264 and x28,x28,x19 // (b^c)&=(a^b) 265 add x25,x25,x21 // d+=h 266 eor x28,x28,x23 // Maj(a,b,c) 267 eor x17,x12,x17,ror#34 // Sigma0(a) 268 add x21,x21,x28 // h+=Maj(a,b,c) 269 ldr x28,[x30],#8 // *K++, x19 in next round 270 //add x21,x21,x17 // h+=Sigma0(a) 271#ifndef __AARCH64EB__ 272 rev x10,x10 // 7 273#endif 274 ldp x11,x12,[x1],#2*8 275 add x21,x21,x17 // h+=Sigma0(a) 276 ror x16,x25,#14 277 add x20,x20,x28 // h+=K[i] 278 eor x13,x25,x25,ror#23 279 and x17,x26,x25 280 bic x28,x27,x25 281 add x20,x20,x10 // h+=X[i] 282 orr x17,x17,x28 // Ch(e,f,g) 283 eor x28,x21,x22 // a^b, b^c in next round 284 eor x16,x16,x13,ror#18 // Sigma1(e) 285 ror x13,x21,#28 286 add x20,x20,x17 // h+=Ch(e,f,g) 287 eor x17,x21,x21,ror#5 288 add x20,x20,x16 // h+=Sigma1(e) 289 and x19,x19,x28 // (b^c)&=(a^b) 290 add x24,x24,x20 // d+=h 291 eor x19,x19,x22 // Maj(a,b,c) 292 eor x17,x13,x17,ror#34 // Sigma0(a) 293 add x20,x20,x19 // h+=Maj(a,b,c) 294 ldr x19,[x30],#8 // *K++, x28 in next round 295 //add x20,x20,x17 // h+=Sigma0(a) 296#ifndef __AARCH64EB__ 297 rev x11,x11 // 8 298#endif 299 add x20,x20,x17 // h+=Sigma0(a) 300 ror x16,x24,#14 301 add x27,x27,x19 // h+=K[i] 302 eor x14,x24,x24,ror#23 303 and x17,x25,x24 304 bic x19,x26,x24 305 add x27,x27,x11 // h+=X[i] 306 orr x17,x17,x19 // Ch(e,f,g) 307 eor x19,x20,x21 // a^b, b^c in next round 308 eor x16,x16,x14,ror#18 // Sigma1(e) 309 ror x14,x20,#28 310 add x27,x27,x17 // h+=Ch(e,f,g) 311 eor x17,x20,x20,ror#5 312 add x27,x27,x16 // h+=Sigma1(e) 313 and x28,x28,x19 // (b^c)&=(a^b) 314 add x23,x23,x27 // d+=h 315 eor x28,x28,x21 // Maj(a,b,c) 316 eor x17,x14,x17,ror#34 // Sigma0(a) 317 add x27,x27,x28 // h+=Maj(a,b,c) 318 ldr x28,[x30],#8 // *K++, x19 in next round 319 //add x27,x27,x17 // h+=Sigma0(a) 320#ifndef __AARCH64EB__ 321 rev x12,x12 // 9 322#endif 323 ldp x13,x14,[x1],#2*8 324 add x27,x27,x17 // h+=Sigma0(a) 325 ror x16,x23,#14 326 add x26,x26,x28 // h+=K[i] 327 eor x15,x23,x23,ror#23 328 and x17,x24,x23 329 bic x28,x25,x23 330 add x26,x26,x12 // h+=X[i] 331 orr x17,x17,x28 // Ch(e,f,g) 332 eor x28,x27,x20 // a^b, b^c in next round 333 eor x16,x16,x15,ror#18 // Sigma1(e) 334 ror x15,x27,#28 335 add x26,x26,x17 // h+=Ch(e,f,g) 336 eor x17,x27,x27,ror#5 337 add x26,x26,x16 // h+=Sigma1(e) 338 and x19,x19,x28 // (b^c)&=(a^b) 339 add x22,x22,x26 // d+=h 340 eor x19,x19,x20 // Maj(a,b,c) 341 eor x17,x15,x17,ror#34 // Sigma0(a) 342 add x26,x26,x19 // h+=Maj(a,b,c) 343 ldr x19,[x30],#8 // *K++, x28 in next round 344 //add x26,x26,x17 // h+=Sigma0(a) 345#ifndef __AARCH64EB__ 346 rev x13,x13 // 10 347#endif 348 add x26,x26,x17 // h+=Sigma0(a) 349 ror x16,x22,#14 350 add x25,x25,x19 // h+=K[i] 351 eor x0,x22,x22,ror#23 352 and x17,x23,x22 353 bic x19,x24,x22 354 add x25,x25,x13 // h+=X[i] 355 orr x17,x17,x19 // Ch(e,f,g) 356 eor x19,x26,x27 // a^b, b^c in next round 357 eor x16,x16,x0,ror#18 // Sigma1(e) 358 ror x0,x26,#28 359 add x25,x25,x17 // h+=Ch(e,f,g) 360 eor x17,x26,x26,ror#5 361 add x25,x25,x16 // h+=Sigma1(e) 362 and x28,x28,x19 // (b^c)&=(a^b) 363 add x21,x21,x25 // d+=h 364 eor x28,x28,x27 // Maj(a,b,c) 365 eor x17,x0,x17,ror#34 // Sigma0(a) 366 add x25,x25,x28 // h+=Maj(a,b,c) 367 ldr x28,[x30],#8 // *K++, x19 in next round 368 //add x25,x25,x17 // h+=Sigma0(a) 369#ifndef __AARCH64EB__ 370 rev x14,x14 // 11 371#endif 372 ldp x15,x0,[x1],#2*8 373 add x25,x25,x17 // h+=Sigma0(a) 374 str x6,[sp,#24] 375 ror x16,x21,#14 376 add x24,x24,x28 // h+=K[i] 377 eor x6,x21,x21,ror#23 378 and x17,x22,x21 379 bic x28,x23,x21 380 add x24,x24,x14 // h+=X[i] 381 orr x17,x17,x28 // Ch(e,f,g) 382 eor x28,x25,x26 // a^b, b^c in next round 383 eor x16,x16,x6,ror#18 // Sigma1(e) 384 ror x6,x25,#28 385 add x24,x24,x17 // h+=Ch(e,f,g) 386 eor x17,x25,x25,ror#5 387 add x24,x24,x16 // h+=Sigma1(e) 388 and x19,x19,x28 // (b^c)&=(a^b) 389 add x20,x20,x24 // d+=h 390 eor x19,x19,x26 // Maj(a,b,c) 391 eor x17,x6,x17,ror#34 // Sigma0(a) 392 add x24,x24,x19 // h+=Maj(a,b,c) 393 ldr x19,[x30],#8 // *K++, x28 in next round 394 //add x24,x24,x17 // h+=Sigma0(a) 395#ifndef __AARCH64EB__ 396 rev x15,x15 // 12 397#endif 398 add x24,x24,x17 // h+=Sigma0(a) 399 str x7,[sp,#0] 400 ror x16,x20,#14 401 add x23,x23,x19 // h+=K[i] 402 eor x7,x20,x20,ror#23 403 and x17,x21,x20 404 bic x19,x22,x20 405 add x23,x23,x15 // h+=X[i] 406 orr x17,x17,x19 // Ch(e,f,g) 407 eor x19,x24,x25 // a^b, b^c in next round 408 eor x16,x16,x7,ror#18 // Sigma1(e) 409 ror x7,x24,#28 410 add x23,x23,x17 // h+=Ch(e,f,g) 411 eor x17,x24,x24,ror#5 412 add x23,x23,x16 // h+=Sigma1(e) 413 and x28,x28,x19 // (b^c)&=(a^b) 414 add x27,x27,x23 // d+=h 415 eor x28,x28,x25 // Maj(a,b,c) 416 eor x17,x7,x17,ror#34 // Sigma0(a) 417 add x23,x23,x28 // h+=Maj(a,b,c) 418 ldr x28,[x30],#8 // *K++, x19 in next round 419 //add x23,x23,x17 // h+=Sigma0(a) 420#ifndef __AARCH64EB__ 421 rev x0,x0 // 13 422#endif 423 ldp x1,x2,[x1] 424 add x23,x23,x17 // h+=Sigma0(a) 425 str x8,[sp,#8] 426 ror x16,x27,#14 427 add x22,x22,x28 // h+=K[i] 428 eor x8,x27,x27,ror#23 429 and x17,x20,x27 430 bic x28,x21,x27 431 add x22,x22,x0 // h+=X[i] 432 orr x17,x17,x28 // Ch(e,f,g) 433 eor x28,x23,x24 // a^b, b^c in next round 434 eor x16,x16,x8,ror#18 // Sigma1(e) 435 ror x8,x23,#28 436 add x22,x22,x17 // h+=Ch(e,f,g) 437 eor x17,x23,x23,ror#5 438 add x22,x22,x16 // h+=Sigma1(e) 439 and x19,x19,x28 // (b^c)&=(a^b) 440 add x26,x26,x22 // d+=h 441 eor x19,x19,x24 // Maj(a,b,c) 442 eor x17,x8,x17,ror#34 // Sigma0(a) 443 add x22,x22,x19 // h+=Maj(a,b,c) 444 ldr x19,[x30],#8 // *K++, x28 in next round 445 //add x22,x22,x17 // h+=Sigma0(a) 446#ifndef __AARCH64EB__ 447 rev x1,x1 // 14 448#endif 449 ldr x6,[sp,#24] 450 add x22,x22,x17 // h+=Sigma0(a) 451 str x9,[sp,#16] 452 ror x16,x26,#14 453 add x21,x21,x19 // h+=K[i] 454 eor x9,x26,x26,ror#23 455 and x17,x27,x26 456 bic x19,x20,x26 457 add x21,x21,x1 // h+=X[i] 458 orr x17,x17,x19 // Ch(e,f,g) 459 eor x19,x22,x23 // a^b, b^c in next round 460 eor x16,x16,x9,ror#18 // Sigma1(e) 461 ror x9,x22,#28 462 add x21,x21,x17 // h+=Ch(e,f,g) 463 eor x17,x22,x22,ror#5 464 add x21,x21,x16 // h+=Sigma1(e) 465 and x28,x28,x19 // (b^c)&=(a^b) 466 add x25,x25,x21 // d+=h 467 eor x28,x28,x23 // Maj(a,b,c) 468 eor x17,x9,x17,ror#34 // Sigma0(a) 469 add x21,x21,x28 // h+=Maj(a,b,c) 470 ldr x28,[x30],#8 // *K++, x19 in next round 471 //add x21,x21,x17 // h+=Sigma0(a) 472#ifndef __AARCH64EB__ 473 rev x2,x2 // 15 474#endif 475 ldr x7,[sp,#0] 476 add x21,x21,x17 // h+=Sigma0(a) 477 str x10,[sp,#24] 478 ror x16,x25,#14 479 add x20,x20,x28 // h+=K[i] 480 ror x9,x4,#1 481 and x17,x26,x25 482 ror x8,x1,#19 483 bic x28,x27,x25 484 ror x10,x21,#28 485 add x20,x20,x2 // h+=X[i] 486 eor x16,x16,x25,ror#18 487 eor x9,x9,x4,ror#8 488 orr x17,x17,x28 // Ch(e,f,g) 489 eor x28,x21,x22 // a^b, b^c in next round 490 eor x16,x16,x25,ror#41 // Sigma1(e) 491 eor x10,x10,x21,ror#34 492 add x20,x20,x17 // h+=Ch(e,f,g) 493 and x19,x19,x28 // (b^c)&=(a^b) 494 eor x8,x8,x1,ror#61 495 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 496 add x20,x20,x16 // h+=Sigma1(e) 497 eor x19,x19,x22 // Maj(a,b,c) 498 eor x17,x10,x21,ror#39 // Sigma0(a) 499 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 500 add x3,x3,x12 501 add x24,x24,x20 // d+=h 502 add x20,x20,x19 // h+=Maj(a,b,c) 503 ldr x19,[x30],#8 // *K++, x28 in next round 504 add x3,x3,x9 505 add x20,x20,x17 // h+=Sigma0(a) 506 add x3,x3,x8 507.Loop_16_xx: 508 ldr x8,[sp,#8] 509 str x11,[sp,#0] 510 ror x16,x24,#14 511 add x27,x27,x19 // h+=K[i] 512 ror x10,x5,#1 513 and x17,x25,x24 514 ror x9,x2,#19 515 bic x19,x26,x24 516 ror x11,x20,#28 517 add x27,x27,x3 // h+=X[i] 518 eor x16,x16,x24,ror#18 519 eor x10,x10,x5,ror#8 520 orr x17,x17,x19 // Ch(e,f,g) 521 eor x19,x20,x21 // a^b, b^c in next round 522 eor x16,x16,x24,ror#41 // Sigma1(e) 523 eor x11,x11,x20,ror#34 524 add x27,x27,x17 // h+=Ch(e,f,g) 525 and x28,x28,x19 // (b^c)&=(a^b) 526 eor x9,x9,x2,ror#61 527 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 528 add x27,x27,x16 // h+=Sigma1(e) 529 eor x28,x28,x21 // Maj(a,b,c) 530 eor x17,x11,x20,ror#39 // Sigma0(a) 531 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 532 add x4,x4,x13 533 add x23,x23,x27 // d+=h 534 add x27,x27,x28 // h+=Maj(a,b,c) 535 ldr x28,[x30],#8 // *K++, x19 in next round 536 add x4,x4,x10 537 add x27,x27,x17 // h+=Sigma0(a) 538 add x4,x4,x9 539 ldr x9,[sp,#16] 540 str x12,[sp,#8] 541 ror x16,x23,#14 542 add x26,x26,x28 // h+=K[i] 543 ror x11,x6,#1 544 and x17,x24,x23 545 ror x10,x3,#19 546 bic x28,x25,x23 547 ror x12,x27,#28 548 add x26,x26,x4 // h+=X[i] 549 eor x16,x16,x23,ror#18 550 eor x11,x11,x6,ror#8 551 orr x17,x17,x28 // Ch(e,f,g) 552 eor x28,x27,x20 // a^b, b^c in next round 553 eor x16,x16,x23,ror#41 // Sigma1(e) 554 eor x12,x12,x27,ror#34 555 add x26,x26,x17 // h+=Ch(e,f,g) 556 and x19,x19,x28 // (b^c)&=(a^b) 557 eor x10,x10,x3,ror#61 558 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 559 add x26,x26,x16 // h+=Sigma1(e) 560 eor x19,x19,x20 // Maj(a,b,c) 561 eor x17,x12,x27,ror#39 // Sigma0(a) 562 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 563 add x5,x5,x14 564 add x22,x22,x26 // d+=h 565 add x26,x26,x19 // h+=Maj(a,b,c) 566 ldr x19,[x30],#8 // *K++, x28 in next round 567 add x5,x5,x11 568 add x26,x26,x17 // h+=Sigma0(a) 569 add x5,x5,x10 570 ldr x10,[sp,#24] 571 str x13,[sp,#16] 572 ror x16,x22,#14 573 add x25,x25,x19 // h+=K[i] 574 ror x12,x7,#1 575 and x17,x23,x22 576 ror x11,x4,#19 577 bic x19,x24,x22 578 ror x13,x26,#28 579 add x25,x25,x5 // h+=X[i] 580 eor x16,x16,x22,ror#18 581 eor x12,x12,x7,ror#8 582 orr x17,x17,x19 // Ch(e,f,g) 583 eor x19,x26,x27 // a^b, b^c in next round 584 eor x16,x16,x22,ror#41 // Sigma1(e) 585 eor x13,x13,x26,ror#34 586 add x25,x25,x17 // h+=Ch(e,f,g) 587 and x28,x28,x19 // (b^c)&=(a^b) 588 eor x11,x11,x4,ror#61 589 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 590 add x25,x25,x16 // h+=Sigma1(e) 591 eor x28,x28,x27 // Maj(a,b,c) 592 eor x17,x13,x26,ror#39 // Sigma0(a) 593 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 594 add x6,x6,x15 595 add x21,x21,x25 // d+=h 596 add x25,x25,x28 // h+=Maj(a,b,c) 597 ldr x28,[x30],#8 // *K++, x19 in next round 598 add x6,x6,x12 599 add x25,x25,x17 // h+=Sigma0(a) 600 add x6,x6,x11 601 ldr x11,[sp,#0] 602 str x14,[sp,#24] 603 ror x16,x21,#14 604 add x24,x24,x28 // h+=K[i] 605 ror x13,x8,#1 606 and x17,x22,x21 607 ror x12,x5,#19 608 bic x28,x23,x21 609 ror x14,x25,#28 610 add x24,x24,x6 // h+=X[i] 611 eor x16,x16,x21,ror#18 612 eor x13,x13,x8,ror#8 613 orr x17,x17,x28 // Ch(e,f,g) 614 eor x28,x25,x26 // a^b, b^c in next round 615 eor x16,x16,x21,ror#41 // Sigma1(e) 616 eor x14,x14,x25,ror#34 617 add x24,x24,x17 // h+=Ch(e,f,g) 618 and x19,x19,x28 // (b^c)&=(a^b) 619 eor x12,x12,x5,ror#61 620 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 621 add x24,x24,x16 // h+=Sigma1(e) 622 eor x19,x19,x26 // Maj(a,b,c) 623 eor x17,x14,x25,ror#39 // Sigma0(a) 624 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 625 add x7,x7,x0 626 add x20,x20,x24 // d+=h 627 add x24,x24,x19 // h+=Maj(a,b,c) 628 ldr x19,[x30],#8 // *K++, x28 in next round 629 add x7,x7,x13 630 add x24,x24,x17 // h+=Sigma0(a) 631 add x7,x7,x12 632 ldr x12,[sp,#8] 633 str x15,[sp,#0] 634 ror x16,x20,#14 635 add x23,x23,x19 // h+=K[i] 636 ror x14,x9,#1 637 and x17,x21,x20 638 ror x13,x6,#19 639 bic x19,x22,x20 640 ror x15,x24,#28 641 add x23,x23,x7 // h+=X[i] 642 eor x16,x16,x20,ror#18 643 eor x14,x14,x9,ror#8 644 orr x17,x17,x19 // Ch(e,f,g) 645 eor x19,x24,x25 // a^b, b^c in next round 646 eor x16,x16,x20,ror#41 // Sigma1(e) 647 eor x15,x15,x24,ror#34 648 add x23,x23,x17 // h+=Ch(e,f,g) 649 and x28,x28,x19 // (b^c)&=(a^b) 650 eor x13,x13,x6,ror#61 651 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 652 add x23,x23,x16 // h+=Sigma1(e) 653 eor x28,x28,x25 // Maj(a,b,c) 654 eor x17,x15,x24,ror#39 // Sigma0(a) 655 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 656 add x8,x8,x1 657 add x27,x27,x23 // d+=h 658 add x23,x23,x28 // h+=Maj(a,b,c) 659 ldr x28,[x30],#8 // *K++, x19 in next round 660 add x8,x8,x14 661 add x23,x23,x17 // h+=Sigma0(a) 662 add x8,x8,x13 663 ldr x13,[sp,#16] 664 str x0,[sp,#8] 665 ror x16,x27,#14 666 add x22,x22,x28 // h+=K[i] 667 ror x15,x10,#1 668 and x17,x20,x27 669 ror x14,x7,#19 670 bic x28,x21,x27 671 ror x0,x23,#28 672 add x22,x22,x8 // h+=X[i] 673 eor x16,x16,x27,ror#18 674 eor x15,x15,x10,ror#8 675 orr x17,x17,x28 // Ch(e,f,g) 676 eor x28,x23,x24 // a^b, b^c in next round 677 eor x16,x16,x27,ror#41 // Sigma1(e) 678 eor x0,x0,x23,ror#34 679 add x22,x22,x17 // h+=Ch(e,f,g) 680 and x19,x19,x28 // (b^c)&=(a^b) 681 eor x14,x14,x7,ror#61 682 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 683 add x22,x22,x16 // h+=Sigma1(e) 684 eor x19,x19,x24 // Maj(a,b,c) 685 eor x17,x0,x23,ror#39 // Sigma0(a) 686 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 687 add x9,x9,x2 688 add x26,x26,x22 // d+=h 689 add x22,x22,x19 // h+=Maj(a,b,c) 690 ldr x19,[x30],#8 // *K++, x28 in next round 691 add x9,x9,x15 692 add x22,x22,x17 // h+=Sigma0(a) 693 add x9,x9,x14 694 ldr x14,[sp,#24] 695 str x1,[sp,#16] 696 ror x16,x26,#14 697 add x21,x21,x19 // h+=K[i] 698 ror x0,x11,#1 699 and x17,x27,x26 700 ror x15,x8,#19 701 bic x19,x20,x26 702 ror x1,x22,#28 703 add x21,x21,x9 // h+=X[i] 704 eor x16,x16,x26,ror#18 705 eor x0,x0,x11,ror#8 706 orr x17,x17,x19 // Ch(e,f,g) 707 eor x19,x22,x23 // a^b, b^c in next round 708 eor x16,x16,x26,ror#41 // Sigma1(e) 709 eor x1,x1,x22,ror#34 710 add x21,x21,x17 // h+=Ch(e,f,g) 711 and x28,x28,x19 // (b^c)&=(a^b) 712 eor x15,x15,x8,ror#61 713 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 714 add x21,x21,x16 // h+=Sigma1(e) 715 eor x28,x28,x23 // Maj(a,b,c) 716 eor x17,x1,x22,ror#39 // Sigma0(a) 717 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 718 add x10,x10,x3 719 add x25,x25,x21 // d+=h 720 add x21,x21,x28 // h+=Maj(a,b,c) 721 ldr x28,[x30],#8 // *K++, x19 in next round 722 add x10,x10,x0 723 add x21,x21,x17 // h+=Sigma0(a) 724 add x10,x10,x15 725 ldr x15,[sp,#0] 726 str x2,[sp,#24] 727 ror x16,x25,#14 728 add x20,x20,x28 // h+=K[i] 729 ror x1,x12,#1 730 and x17,x26,x25 731 ror x0,x9,#19 732 bic x28,x27,x25 733 ror x2,x21,#28 734 add x20,x20,x10 // h+=X[i] 735 eor x16,x16,x25,ror#18 736 eor x1,x1,x12,ror#8 737 orr x17,x17,x28 // Ch(e,f,g) 738 eor x28,x21,x22 // a^b, b^c in next round 739 eor x16,x16,x25,ror#41 // Sigma1(e) 740 eor x2,x2,x21,ror#34 741 add x20,x20,x17 // h+=Ch(e,f,g) 742 and x19,x19,x28 // (b^c)&=(a^b) 743 eor x0,x0,x9,ror#61 744 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 745 add x20,x20,x16 // h+=Sigma1(e) 746 eor x19,x19,x22 // Maj(a,b,c) 747 eor x17,x2,x21,ror#39 // Sigma0(a) 748 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 749 add x11,x11,x4 750 add x24,x24,x20 // d+=h 751 add x20,x20,x19 // h+=Maj(a,b,c) 752 ldr x19,[x30],#8 // *K++, x28 in next round 753 add x11,x11,x1 754 add x20,x20,x17 // h+=Sigma0(a) 755 add x11,x11,x0 756 ldr x0,[sp,#8] 757 str x3,[sp,#0] 758 ror x16,x24,#14 759 add x27,x27,x19 // h+=K[i] 760 ror x2,x13,#1 761 and x17,x25,x24 762 ror x1,x10,#19 763 bic x19,x26,x24 764 ror x3,x20,#28 765 add x27,x27,x11 // h+=X[i] 766 eor x16,x16,x24,ror#18 767 eor x2,x2,x13,ror#8 768 orr x17,x17,x19 // Ch(e,f,g) 769 eor x19,x20,x21 // a^b, b^c in next round 770 eor x16,x16,x24,ror#41 // Sigma1(e) 771 eor x3,x3,x20,ror#34 772 add x27,x27,x17 // h+=Ch(e,f,g) 773 and x28,x28,x19 // (b^c)&=(a^b) 774 eor x1,x1,x10,ror#61 775 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 776 add x27,x27,x16 // h+=Sigma1(e) 777 eor x28,x28,x21 // Maj(a,b,c) 778 eor x17,x3,x20,ror#39 // Sigma0(a) 779 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 780 add x12,x12,x5 781 add x23,x23,x27 // d+=h 782 add x27,x27,x28 // h+=Maj(a,b,c) 783 ldr x28,[x30],#8 // *K++, x19 in next round 784 add x12,x12,x2 785 add x27,x27,x17 // h+=Sigma0(a) 786 add x12,x12,x1 787 ldr x1,[sp,#16] 788 str x4,[sp,#8] 789 ror x16,x23,#14 790 add x26,x26,x28 // h+=K[i] 791 ror x3,x14,#1 792 and x17,x24,x23 793 ror x2,x11,#19 794 bic x28,x25,x23 795 ror x4,x27,#28 796 add x26,x26,x12 // h+=X[i] 797 eor x16,x16,x23,ror#18 798 eor x3,x3,x14,ror#8 799 orr x17,x17,x28 // Ch(e,f,g) 800 eor x28,x27,x20 // a^b, b^c in next round 801 eor x16,x16,x23,ror#41 // Sigma1(e) 802 eor x4,x4,x27,ror#34 803 add x26,x26,x17 // h+=Ch(e,f,g) 804 and x19,x19,x28 // (b^c)&=(a^b) 805 eor x2,x2,x11,ror#61 806 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 807 add x26,x26,x16 // h+=Sigma1(e) 808 eor x19,x19,x20 // Maj(a,b,c) 809 eor x17,x4,x27,ror#39 // Sigma0(a) 810 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 811 add x13,x13,x6 812 add x22,x22,x26 // d+=h 813 add x26,x26,x19 // h+=Maj(a,b,c) 814 ldr x19,[x30],#8 // *K++, x28 in next round 815 add x13,x13,x3 816 add x26,x26,x17 // h+=Sigma0(a) 817 add x13,x13,x2 818 ldr x2,[sp,#24] 819 str x5,[sp,#16] 820 ror x16,x22,#14 821 add x25,x25,x19 // h+=K[i] 822 ror x4,x15,#1 823 and x17,x23,x22 824 ror x3,x12,#19 825 bic x19,x24,x22 826 ror x5,x26,#28 827 add x25,x25,x13 // h+=X[i] 828 eor x16,x16,x22,ror#18 829 eor x4,x4,x15,ror#8 830 orr x17,x17,x19 // Ch(e,f,g) 831 eor x19,x26,x27 // a^b, b^c in next round 832 eor x16,x16,x22,ror#41 // Sigma1(e) 833 eor x5,x5,x26,ror#34 834 add x25,x25,x17 // h+=Ch(e,f,g) 835 and x28,x28,x19 // (b^c)&=(a^b) 836 eor x3,x3,x12,ror#61 837 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 838 add x25,x25,x16 // h+=Sigma1(e) 839 eor x28,x28,x27 // Maj(a,b,c) 840 eor x17,x5,x26,ror#39 // Sigma0(a) 841 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 842 add x14,x14,x7 843 add x21,x21,x25 // d+=h 844 add x25,x25,x28 // h+=Maj(a,b,c) 845 ldr x28,[x30],#8 // *K++, x19 in next round 846 add x14,x14,x4 847 add x25,x25,x17 // h+=Sigma0(a) 848 add x14,x14,x3 849 ldr x3,[sp,#0] 850 str x6,[sp,#24] 851 ror x16,x21,#14 852 add x24,x24,x28 // h+=K[i] 853 ror x5,x0,#1 854 and x17,x22,x21 855 ror x4,x13,#19 856 bic x28,x23,x21 857 ror x6,x25,#28 858 add x24,x24,x14 // h+=X[i] 859 eor x16,x16,x21,ror#18 860 eor x5,x5,x0,ror#8 861 orr x17,x17,x28 // Ch(e,f,g) 862 eor x28,x25,x26 // a^b, b^c in next round 863 eor x16,x16,x21,ror#41 // Sigma1(e) 864 eor x6,x6,x25,ror#34 865 add x24,x24,x17 // h+=Ch(e,f,g) 866 and x19,x19,x28 // (b^c)&=(a^b) 867 eor x4,x4,x13,ror#61 868 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 869 add x24,x24,x16 // h+=Sigma1(e) 870 eor x19,x19,x26 // Maj(a,b,c) 871 eor x17,x6,x25,ror#39 // Sigma0(a) 872 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 873 add x15,x15,x8 874 add x20,x20,x24 // d+=h 875 add x24,x24,x19 // h+=Maj(a,b,c) 876 ldr x19,[x30],#8 // *K++, x28 in next round 877 add x15,x15,x5 878 add x24,x24,x17 // h+=Sigma0(a) 879 add x15,x15,x4 880 ldr x4,[sp,#8] 881 str x7,[sp,#0] 882 ror x16,x20,#14 883 add x23,x23,x19 // h+=K[i] 884 ror x6,x1,#1 885 and x17,x21,x20 886 ror x5,x14,#19 887 bic x19,x22,x20 888 ror x7,x24,#28 889 add x23,x23,x15 // h+=X[i] 890 eor x16,x16,x20,ror#18 891 eor x6,x6,x1,ror#8 892 orr x17,x17,x19 // Ch(e,f,g) 893 eor x19,x24,x25 // a^b, b^c in next round 894 eor x16,x16,x20,ror#41 // Sigma1(e) 895 eor x7,x7,x24,ror#34 896 add x23,x23,x17 // h+=Ch(e,f,g) 897 and x28,x28,x19 // (b^c)&=(a^b) 898 eor x5,x5,x14,ror#61 899 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 900 add x23,x23,x16 // h+=Sigma1(e) 901 eor x28,x28,x25 // Maj(a,b,c) 902 eor x17,x7,x24,ror#39 // Sigma0(a) 903 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 904 add x0,x0,x9 905 add x27,x27,x23 // d+=h 906 add x23,x23,x28 // h+=Maj(a,b,c) 907 ldr x28,[x30],#8 // *K++, x19 in next round 908 add x0,x0,x6 909 add x23,x23,x17 // h+=Sigma0(a) 910 add x0,x0,x5 911 ldr x5,[sp,#16] 912 str x8,[sp,#8] 913 ror x16,x27,#14 914 add x22,x22,x28 // h+=K[i] 915 ror x7,x2,#1 916 and x17,x20,x27 917 ror x6,x15,#19 918 bic x28,x21,x27 919 ror x8,x23,#28 920 add x22,x22,x0 // h+=X[i] 921 eor x16,x16,x27,ror#18 922 eor x7,x7,x2,ror#8 923 orr x17,x17,x28 // Ch(e,f,g) 924 eor x28,x23,x24 // a^b, b^c in next round 925 eor x16,x16,x27,ror#41 // Sigma1(e) 926 eor x8,x8,x23,ror#34 927 add x22,x22,x17 // h+=Ch(e,f,g) 928 and x19,x19,x28 // (b^c)&=(a^b) 929 eor x6,x6,x15,ror#61 930 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 931 add x22,x22,x16 // h+=Sigma1(e) 932 eor x19,x19,x24 // Maj(a,b,c) 933 eor x17,x8,x23,ror#39 // Sigma0(a) 934 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 935 add x1,x1,x10 936 add x26,x26,x22 // d+=h 937 add x22,x22,x19 // h+=Maj(a,b,c) 938 ldr x19,[x30],#8 // *K++, x28 in next round 939 add x1,x1,x7 940 add x22,x22,x17 // h+=Sigma0(a) 941 add x1,x1,x6 942 ldr x6,[sp,#24] 943 str x9,[sp,#16] 944 ror x16,x26,#14 945 add x21,x21,x19 // h+=K[i] 946 ror x8,x3,#1 947 and x17,x27,x26 948 ror x7,x0,#19 949 bic x19,x20,x26 950 ror x9,x22,#28 951 add x21,x21,x1 // h+=X[i] 952 eor x16,x16,x26,ror#18 953 eor x8,x8,x3,ror#8 954 orr x17,x17,x19 // Ch(e,f,g) 955 eor x19,x22,x23 // a^b, b^c in next round 956 eor x16,x16,x26,ror#41 // Sigma1(e) 957 eor x9,x9,x22,ror#34 958 add x21,x21,x17 // h+=Ch(e,f,g) 959 and x28,x28,x19 // (b^c)&=(a^b) 960 eor x7,x7,x0,ror#61 961 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 962 add x21,x21,x16 // h+=Sigma1(e) 963 eor x28,x28,x23 // Maj(a,b,c) 964 eor x17,x9,x22,ror#39 // Sigma0(a) 965 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 966 add x2,x2,x11 967 add x25,x25,x21 // d+=h 968 add x21,x21,x28 // h+=Maj(a,b,c) 969 ldr x28,[x30],#8 // *K++, x19 in next round 970 add x2,x2,x8 971 add x21,x21,x17 // h+=Sigma0(a) 972 add x2,x2,x7 973 ldr x7,[sp,#0] 974 str x10,[sp,#24] 975 ror x16,x25,#14 976 add x20,x20,x28 // h+=K[i] 977 ror x9,x4,#1 978 and x17,x26,x25 979 ror x8,x1,#19 980 bic x28,x27,x25 981 ror x10,x21,#28 982 add x20,x20,x2 // h+=X[i] 983 eor x16,x16,x25,ror#18 984 eor x9,x9,x4,ror#8 985 orr x17,x17,x28 // Ch(e,f,g) 986 eor x28,x21,x22 // a^b, b^c in next round 987 eor x16,x16,x25,ror#41 // Sigma1(e) 988 eor x10,x10,x21,ror#34 989 add x20,x20,x17 // h+=Ch(e,f,g) 990 and x19,x19,x28 // (b^c)&=(a^b) 991 eor x8,x8,x1,ror#61 992 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 993 add x20,x20,x16 // h+=Sigma1(e) 994 eor x19,x19,x22 // Maj(a,b,c) 995 eor x17,x10,x21,ror#39 // Sigma0(a) 996 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 997 add x3,x3,x12 998 add x24,x24,x20 // d+=h 999 add x20,x20,x19 // h+=Maj(a,b,c) 1000 ldr x19,[x30],#8 // *K++, x28 in next round 1001 add x3,x3,x9 1002 add x20,x20,x17 // h+=Sigma0(a) 1003 add x3,x3,x8 1004 cbnz x19,.Loop_16_xx 1005 1006 ldp x0,x2,[x29,#96] 1007 ldr x1,[x29,#112] 1008 sub x30,x30,#648 // rewind 1009 1010 ldp x3,x4,[x0] 1011 ldp x5,x6,[x0,#2*8] 1012 add x1,x1,#14*8 // advance input pointer 1013 ldp x7,x8,[x0,#4*8] 1014 add x20,x20,x3 1015 ldp x9,x10,[x0,#6*8] 1016 add x21,x21,x4 1017 add x22,x22,x5 1018 add x23,x23,x6 1019 stp x20,x21,[x0] 1020 add x24,x24,x7 1021 add x25,x25,x8 1022 stp x22,x23,[x0,#2*8] 1023 add x26,x26,x9 1024 add x27,x27,x10 1025 cmp x1,x2 1026 stp x24,x25,[x0,#4*8] 1027 stp x26,x27,[x0,#6*8] 1028 b.ne .Loop 1029 1030 ldp x19,x20,[x29,#16] 1031 add sp,sp,#4*8 1032 ldp x21,x22,[x29,#32] 1033 ldp x23,x24,[x29,#48] 1034 ldp x25,x26,[x29,#64] 1035 ldp x27,x28,[x29,#80] 1036 ldp x29,x30,[sp],#128 1037.inst 0xd50323bf // autiasp 1038 ret 1039.size sha512_block_data_order,.-sha512_block_data_order 1040 1041.align 6 1042.type .LK512,%object 1043.LK512: 1044.quad 0x428a2f98d728ae22,0x7137449123ef65cd 1045.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 1046.quad 0x3956c25bf348b538,0x59f111f1b605d019 1047.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 1048.quad 0xd807aa98a3030242,0x12835b0145706fbe 1049.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 1050.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 1051.quad 0x9bdc06a725c71235,0xc19bf174cf692694 1052.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 1053.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 1054.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 1055.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 1056.quad 0x983e5152ee66dfab,0xa831c66d2db43210 1057.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 1058.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 1059.quad 0x06ca6351e003826f,0x142929670a0e6e70 1060.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 1061.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 1062.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 1063.quad 0x81c2c92e47edaee6,0x92722c851482353b 1064.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 1065.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 1066.quad 0xd192e819d6ef5218,0xd69906245565a910 1067.quad 0xf40e35855771202a,0x106aa07032bbd1b8 1068.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 1069.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 1070.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 1071.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 1072.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 1073.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 1074.quad 0x90befffa23631e28,0xa4506cebde82bde9 1075.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 1076.quad 0xca273eceea26619c,0xd186b8c721c0c207 1077.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 1078.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 1079.quad 0x113f9804bef90dae,0x1b710b35131c471b 1080.quad 0x28db77f523047d84,0x32caab7b40c72493 1081.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 1082.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 1083.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 1084.quad 0 // terminator 1085.size .LK512,.-.LK512 1086.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1087.align 2 1088.align 2 1089#ifndef __KERNEL__ 1090.type sha512_block_armv8,%function 1091.align 6 1092sha512_block_armv8: 1093.Lv8_entry: 1094 stp x29,x30,[sp,#-16]! 1095 add x29,sp,#0 1096 1097 ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input 1098 ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 1099 1100 ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context 1101 adr x3,.LK512 1102 1103 rev64 v16.16b,v16.16b 1104 rev64 v17.16b,v17.16b 1105 rev64 v18.16b,v18.16b 1106 rev64 v19.16b,v19.16b 1107 rev64 v20.16b,v20.16b 1108 rev64 v21.16b,v21.16b 1109 rev64 v22.16b,v22.16b 1110 rev64 v23.16b,v23.16b 1111 b .Loop_hw 1112 1113.align 4 1114.Loop_hw: 1115 ld1 {v24.2d},[x3],#16 1116 subs x2,x2,#1 1117 sub x4,x1,#128 1118 orr v26.16b,v0.16b,v0.16b // offload 1119 orr v27.16b,v1.16b,v1.16b 1120 orr v28.16b,v2.16b,v2.16b 1121 orr v29.16b,v3.16b,v3.16b 1122 csel x1,x1,x4,ne // conditional rewind 1123 add v24.2d,v24.2d,v16.2d 1124 ld1 {v25.2d},[x3],#16 1125 ext v24.16b,v24.16b,v24.16b,#8 1126 ext v5.16b,v2.16b,v3.16b,#8 1127 ext v6.16b,v1.16b,v2.16b,#8 1128 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1129.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1130 ext v7.16b,v20.16b,v21.16b,#8 1131.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1132.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1133 add v4.2d,v1.2d,v3.2d // "D + T1" 1134.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1135 add v25.2d,v25.2d,v17.2d 1136 ld1 {v24.2d},[x3],#16 1137 ext v25.16b,v25.16b,v25.16b,#8 1138 ext v5.16b,v4.16b,v2.16b,#8 1139 ext v6.16b,v0.16b,v4.16b,#8 1140 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1141.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1142 ext v7.16b,v21.16b,v22.16b,#8 1143.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1144.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1145 add v1.2d,v0.2d,v2.2d // "D + T1" 1146.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1147 add v24.2d,v24.2d,v18.2d 1148 ld1 {v25.2d},[x3],#16 1149 ext v24.16b,v24.16b,v24.16b,#8 1150 ext v5.16b,v1.16b,v4.16b,#8 1151 ext v6.16b,v3.16b,v1.16b,#8 1152 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1153.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1154 ext v7.16b,v22.16b,v23.16b,#8 1155.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1156.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1157 add v0.2d,v3.2d,v4.2d // "D + T1" 1158.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1159 add v25.2d,v25.2d,v19.2d 1160 ld1 {v24.2d},[x3],#16 1161 ext v25.16b,v25.16b,v25.16b,#8 1162 ext v5.16b,v0.16b,v1.16b,#8 1163 ext v6.16b,v2.16b,v0.16b,#8 1164 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1165.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1166 ext v7.16b,v23.16b,v16.16b,#8 1167.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1168.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1169 add v3.2d,v2.2d,v1.2d // "D + T1" 1170.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1171 add v24.2d,v24.2d,v20.2d 1172 ld1 {v25.2d},[x3],#16 1173 ext v24.16b,v24.16b,v24.16b,#8 1174 ext v5.16b,v3.16b,v0.16b,#8 1175 ext v6.16b,v4.16b,v3.16b,#8 1176 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1177.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1178 ext v7.16b,v16.16b,v17.16b,#8 1179.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1180.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1181 add v2.2d,v4.2d,v0.2d // "D + T1" 1182.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1183 add v25.2d,v25.2d,v21.2d 1184 ld1 {v24.2d},[x3],#16 1185 ext v25.16b,v25.16b,v25.16b,#8 1186 ext v5.16b,v2.16b,v3.16b,#8 1187 ext v6.16b,v1.16b,v2.16b,#8 1188 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1189.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1190 ext v7.16b,v17.16b,v18.16b,#8 1191.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1192.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1193 add v4.2d,v1.2d,v3.2d // "D + T1" 1194.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1195 add v24.2d,v24.2d,v22.2d 1196 ld1 {v25.2d},[x3],#16 1197 ext v24.16b,v24.16b,v24.16b,#8 1198 ext v5.16b,v4.16b,v2.16b,#8 1199 ext v6.16b,v0.16b,v4.16b,#8 1200 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1201.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1202 ext v7.16b,v18.16b,v19.16b,#8 1203.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1204.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1205 add v1.2d,v0.2d,v2.2d // "D + T1" 1206.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1207 add v25.2d,v25.2d,v23.2d 1208 ld1 {v24.2d},[x3],#16 1209 ext v25.16b,v25.16b,v25.16b,#8 1210 ext v5.16b,v1.16b,v4.16b,#8 1211 ext v6.16b,v3.16b,v1.16b,#8 1212 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1213.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1214 ext v7.16b,v19.16b,v20.16b,#8 1215.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1216.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1217 add v0.2d,v3.2d,v4.2d // "D + T1" 1218.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1219 add v24.2d,v24.2d,v16.2d 1220 ld1 {v25.2d},[x3],#16 1221 ext v24.16b,v24.16b,v24.16b,#8 1222 ext v5.16b,v0.16b,v1.16b,#8 1223 ext v6.16b,v2.16b,v0.16b,#8 1224 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1225.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1226 ext v7.16b,v20.16b,v21.16b,#8 1227.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1228.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1229 add v3.2d,v2.2d,v1.2d // "D + T1" 1230.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1231 add v25.2d,v25.2d,v17.2d 1232 ld1 {v24.2d},[x3],#16 1233 ext v25.16b,v25.16b,v25.16b,#8 1234 ext v5.16b,v3.16b,v0.16b,#8 1235 ext v6.16b,v4.16b,v3.16b,#8 1236 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1237.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1238 ext v7.16b,v21.16b,v22.16b,#8 1239.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1240.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1241 add v2.2d,v4.2d,v0.2d // "D + T1" 1242.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1243 add v24.2d,v24.2d,v18.2d 1244 ld1 {v25.2d},[x3],#16 1245 ext v24.16b,v24.16b,v24.16b,#8 1246 ext v5.16b,v2.16b,v3.16b,#8 1247 ext v6.16b,v1.16b,v2.16b,#8 1248 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1249.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1250 ext v7.16b,v22.16b,v23.16b,#8 1251.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1252.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1253 add v4.2d,v1.2d,v3.2d // "D + T1" 1254.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1255 add v25.2d,v25.2d,v19.2d 1256 ld1 {v24.2d},[x3],#16 1257 ext v25.16b,v25.16b,v25.16b,#8 1258 ext v5.16b,v4.16b,v2.16b,#8 1259 ext v6.16b,v0.16b,v4.16b,#8 1260 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1261.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1262 ext v7.16b,v23.16b,v16.16b,#8 1263.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1264.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1265 add v1.2d,v0.2d,v2.2d // "D + T1" 1266.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1267 add v24.2d,v24.2d,v20.2d 1268 ld1 {v25.2d},[x3],#16 1269 ext v24.16b,v24.16b,v24.16b,#8 1270 ext v5.16b,v1.16b,v4.16b,#8 1271 ext v6.16b,v3.16b,v1.16b,#8 1272 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1273.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1274 ext v7.16b,v16.16b,v17.16b,#8 1275.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1276.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1277 add v0.2d,v3.2d,v4.2d // "D + T1" 1278.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1279 add v25.2d,v25.2d,v21.2d 1280 ld1 {v24.2d},[x3],#16 1281 ext v25.16b,v25.16b,v25.16b,#8 1282 ext v5.16b,v0.16b,v1.16b,#8 1283 ext v6.16b,v2.16b,v0.16b,#8 1284 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1285.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1286 ext v7.16b,v17.16b,v18.16b,#8 1287.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1288.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1289 add v3.2d,v2.2d,v1.2d // "D + T1" 1290.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1291 add v24.2d,v24.2d,v22.2d 1292 ld1 {v25.2d},[x3],#16 1293 ext v24.16b,v24.16b,v24.16b,#8 1294 ext v5.16b,v3.16b,v0.16b,#8 1295 ext v6.16b,v4.16b,v3.16b,#8 1296 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1297.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1298 ext v7.16b,v18.16b,v19.16b,#8 1299.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1300.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1301 add v2.2d,v4.2d,v0.2d // "D + T1" 1302.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1303 add v25.2d,v25.2d,v23.2d 1304 ld1 {v24.2d},[x3],#16 1305 ext v25.16b,v25.16b,v25.16b,#8 1306 ext v5.16b,v2.16b,v3.16b,#8 1307 ext v6.16b,v1.16b,v2.16b,#8 1308 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1309.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1310 ext v7.16b,v19.16b,v20.16b,#8 1311.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1312.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1313 add v4.2d,v1.2d,v3.2d // "D + T1" 1314.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1315 add v24.2d,v24.2d,v16.2d 1316 ld1 {v25.2d},[x3],#16 1317 ext v24.16b,v24.16b,v24.16b,#8 1318 ext v5.16b,v4.16b,v2.16b,#8 1319 ext v6.16b,v0.16b,v4.16b,#8 1320 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1321.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1322 ext v7.16b,v20.16b,v21.16b,#8 1323.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1324.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1325 add v1.2d,v0.2d,v2.2d // "D + T1" 1326.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1327 add v25.2d,v25.2d,v17.2d 1328 ld1 {v24.2d},[x3],#16 1329 ext v25.16b,v25.16b,v25.16b,#8 1330 ext v5.16b,v1.16b,v4.16b,#8 1331 ext v6.16b,v3.16b,v1.16b,#8 1332 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1333.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1334 ext v7.16b,v21.16b,v22.16b,#8 1335.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1336.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1337 add v0.2d,v3.2d,v4.2d // "D + T1" 1338.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1339 add v24.2d,v24.2d,v18.2d 1340 ld1 {v25.2d},[x3],#16 1341 ext v24.16b,v24.16b,v24.16b,#8 1342 ext v5.16b,v0.16b,v1.16b,#8 1343 ext v6.16b,v2.16b,v0.16b,#8 1344 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1345.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1346 ext v7.16b,v22.16b,v23.16b,#8 1347.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1348.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1349 add v3.2d,v2.2d,v1.2d // "D + T1" 1350.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1351 add v25.2d,v25.2d,v19.2d 1352 ld1 {v24.2d},[x3],#16 1353 ext v25.16b,v25.16b,v25.16b,#8 1354 ext v5.16b,v3.16b,v0.16b,#8 1355 ext v6.16b,v4.16b,v3.16b,#8 1356 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1357.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1358 ext v7.16b,v23.16b,v16.16b,#8 1359.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1360.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1361 add v2.2d,v4.2d,v0.2d // "D + T1" 1362.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1363 add v24.2d,v24.2d,v20.2d 1364 ld1 {v25.2d},[x3],#16 1365 ext v24.16b,v24.16b,v24.16b,#8 1366 ext v5.16b,v2.16b,v3.16b,#8 1367 ext v6.16b,v1.16b,v2.16b,#8 1368 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1369.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1370 ext v7.16b,v16.16b,v17.16b,#8 1371.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1372.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1373 add v4.2d,v1.2d,v3.2d // "D + T1" 1374.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1375 add v25.2d,v25.2d,v21.2d 1376 ld1 {v24.2d},[x3],#16 1377 ext v25.16b,v25.16b,v25.16b,#8 1378 ext v5.16b,v4.16b,v2.16b,#8 1379 ext v6.16b,v0.16b,v4.16b,#8 1380 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1381.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1382 ext v7.16b,v17.16b,v18.16b,#8 1383.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1384.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1385 add v1.2d,v0.2d,v2.2d // "D + T1" 1386.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1387 add v24.2d,v24.2d,v22.2d 1388 ld1 {v25.2d},[x3],#16 1389 ext v24.16b,v24.16b,v24.16b,#8 1390 ext v5.16b,v1.16b,v4.16b,#8 1391 ext v6.16b,v3.16b,v1.16b,#8 1392 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1393.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1394 ext v7.16b,v18.16b,v19.16b,#8 1395.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1396.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1397 add v0.2d,v3.2d,v4.2d // "D + T1" 1398.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1399 add v25.2d,v25.2d,v23.2d 1400 ld1 {v24.2d},[x3],#16 1401 ext v25.16b,v25.16b,v25.16b,#8 1402 ext v5.16b,v0.16b,v1.16b,#8 1403 ext v6.16b,v2.16b,v0.16b,#8 1404 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1405.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1406 ext v7.16b,v19.16b,v20.16b,#8 1407.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1408.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1409 add v3.2d,v2.2d,v1.2d // "D + T1" 1410.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1411 add v24.2d,v24.2d,v16.2d 1412 ld1 {v25.2d},[x3],#16 1413 ext v24.16b,v24.16b,v24.16b,#8 1414 ext v5.16b,v3.16b,v0.16b,#8 1415 ext v6.16b,v4.16b,v3.16b,#8 1416 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1417.inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1418 ext v7.16b,v20.16b,v21.16b,#8 1419.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1420.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1421 add v2.2d,v4.2d,v0.2d // "D + T1" 1422.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1423 add v25.2d,v25.2d,v17.2d 1424 ld1 {v24.2d},[x3],#16 1425 ext v25.16b,v25.16b,v25.16b,#8 1426 ext v5.16b,v2.16b,v3.16b,#8 1427 ext v6.16b,v1.16b,v2.16b,#8 1428 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1429.inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1430 ext v7.16b,v21.16b,v22.16b,#8 1431.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1432.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1433 add v4.2d,v1.2d,v3.2d // "D + T1" 1434.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1435 add v24.2d,v24.2d,v18.2d 1436 ld1 {v25.2d},[x3],#16 1437 ext v24.16b,v24.16b,v24.16b,#8 1438 ext v5.16b,v4.16b,v2.16b,#8 1439 ext v6.16b,v0.16b,v4.16b,#8 1440 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1441.inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1442 ext v7.16b,v22.16b,v23.16b,#8 1443.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1444.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1445 add v1.2d,v0.2d,v2.2d // "D + T1" 1446.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1447 add v25.2d,v25.2d,v19.2d 1448 ld1 {v24.2d},[x3],#16 1449 ext v25.16b,v25.16b,v25.16b,#8 1450 ext v5.16b,v1.16b,v4.16b,#8 1451 ext v6.16b,v3.16b,v1.16b,#8 1452 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1453.inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1454 ext v7.16b,v23.16b,v16.16b,#8 1455.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1456.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1457 add v0.2d,v3.2d,v4.2d // "D + T1" 1458.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1459 add v24.2d,v24.2d,v20.2d 1460 ld1 {v25.2d},[x3],#16 1461 ext v24.16b,v24.16b,v24.16b,#8 1462 ext v5.16b,v0.16b,v1.16b,#8 1463 ext v6.16b,v2.16b,v0.16b,#8 1464 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1465.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1466 ext v7.16b,v16.16b,v17.16b,#8 1467.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1468.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1469 add v3.2d,v2.2d,v1.2d // "D + T1" 1470.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1471 add v25.2d,v25.2d,v21.2d 1472 ld1 {v24.2d},[x3],#16 1473 ext v25.16b,v25.16b,v25.16b,#8 1474 ext v5.16b,v3.16b,v0.16b,#8 1475 ext v6.16b,v4.16b,v3.16b,#8 1476 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1477.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1478 ext v7.16b,v17.16b,v18.16b,#8 1479.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1480.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1481 add v2.2d,v4.2d,v0.2d // "D + T1" 1482.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1483 add v24.2d,v24.2d,v22.2d 1484 ld1 {v25.2d},[x3],#16 1485 ext v24.16b,v24.16b,v24.16b,#8 1486 ext v5.16b,v2.16b,v3.16b,#8 1487 ext v6.16b,v1.16b,v2.16b,#8 1488 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1489.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1490 ext v7.16b,v18.16b,v19.16b,#8 1491.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1492.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1493 add v4.2d,v1.2d,v3.2d // "D + T1" 1494.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1495 add v25.2d,v25.2d,v23.2d 1496 ld1 {v24.2d},[x3],#16 1497 ext v25.16b,v25.16b,v25.16b,#8 1498 ext v5.16b,v4.16b,v2.16b,#8 1499 ext v6.16b,v0.16b,v4.16b,#8 1500 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1501.inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1502 ext v7.16b,v19.16b,v20.16b,#8 1503.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1504.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1505 add v1.2d,v0.2d,v2.2d // "D + T1" 1506.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1507 ld1 {v25.2d},[x3],#16 1508 add v24.2d,v24.2d,v16.2d 1509 ld1 {v16.16b},[x1],#16 // load next input 1510 ext v24.16b,v24.16b,v24.16b,#8 1511 ext v5.16b,v1.16b,v4.16b,#8 1512 ext v6.16b,v3.16b,v1.16b,#8 1513 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1514.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1515 rev64 v16.16b,v16.16b 1516 add v0.2d,v3.2d,v4.2d // "D + T1" 1517.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1518 ld1 {v24.2d},[x3],#16 1519 add v25.2d,v25.2d,v17.2d 1520 ld1 {v17.16b},[x1],#16 // load next input 1521 ext v25.16b,v25.16b,v25.16b,#8 1522 ext v5.16b,v0.16b,v1.16b,#8 1523 ext v6.16b,v2.16b,v0.16b,#8 1524 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1525.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1526 rev64 v17.16b,v17.16b 1527 add v3.2d,v2.2d,v1.2d // "D + T1" 1528.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1529 ld1 {v25.2d},[x3],#16 1530 add v24.2d,v24.2d,v18.2d 1531 ld1 {v18.16b},[x1],#16 // load next input 1532 ext v24.16b,v24.16b,v24.16b,#8 1533 ext v5.16b,v3.16b,v0.16b,#8 1534 ext v6.16b,v4.16b,v3.16b,#8 1535 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1536.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1537 rev64 v18.16b,v18.16b 1538 add v2.2d,v4.2d,v0.2d // "D + T1" 1539.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1540 ld1 {v24.2d},[x3],#16 1541 add v25.2d,v25.2d,v19.2d 1542 ld1 {v19.16b},[x1],#16 // load next input 1543 ext v25.16b,v25.16b,v25.16b,#8 1544 ext v5.16b,v2.16b,v3.16b,#8 1545 ext v6.16b,v1.16b,v2.16b,#8 1546 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1547.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1548 rev64 v19.16b,v19.16b 1549 add v4.2d,v1.2d,v3.2d // "D + T1" 1550.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1551 ld1 {v25.2d},[x3],#16 1552 add v24.2d,v24.2d,v20.2d 1553 ld1 {v20.16b},[x1],#16 // load next input 1554 ext v24.16b,v24.16b,v24.16b,#8 1555 ext v5.16b,v4.16b,v2.16b,#8 1556 ext v6.16b,v0.16b,v4.16b,#8 1557 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1558.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1559 rev64 v20.16b,v20.16b 1560 add v1.2d,v0.2d,v2.2d // "D + T1" 1561.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1562 ld1 {v24.2d},[x3],#16 1563 add v25.2d,v25.2d,v21.2d 1564 ld1 {v21.16b},[x1],#16 // load next input 1565 ext v25.16b,v25.16b,v25.16b,#8 1566 ext v5.16b,v1.16b,v4.16b,#8 1567 ext v6.16b,v3.16b,v1.16b,#8 1568 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1569.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1570 rev64 v21.16b,v21.16b 1571 add v0.2d,v3.2d,v4.2d // "D + T1" 1572.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1573 ld1 {v25.2d},[x3],#16 1574 add v24.2d,v24.2d,v22.2d 1575 ld1 {v22.16b},[x1],#16 // load next input 1576 ext v24.16b,v24.16b,v24.16b,#8 1577 ext v5.16b,v0.16b,v1.16b,#8 1578 ext v6.16b,v2.16b,v0.16b,#8 1579 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1580.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1581 rev64 v22.16b,v22.16b 1582 add v3.2d,v2.2d,v1.2d // "D + T1" 1583.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1584 sub x3,x3,#80*8 // rewind 1585 add v25.2d,v25.2d,v23.2d 1586 ld1 {v23.16b},[x1],#16 // load next input 1587 ext v25.16b,v25.16b,v25.16b,#8 1588 ext v5.16b,v3.16b,v0.16b,#8 1589 ext v6.16b,v4.16b,v3.16b,#8 1590 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1591.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1592 rev64 v23.16b,v23.16b 1593 add v2.2d,v4.2d,v0.2d // "D + T1" 1594.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1595 add v0.2d,v0.2d,v26.2d // accumulate 1596 add v1.2d,v1.2d,v27.2d 1597 add v2.2d,v2.2d,v28.2d 1598 add v3.2d,v3.2d,v29.2d 1599 1600 cbnz x2,.Loop_hw 1601 1602 st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context 1603 1604 ldr x29,[sp],#16 1605 ret 1606.size sha512_block_armv8,.-sha512_block_armv8 1607#endif 1608