1// SPDX-License-Identifier: Apache-2.0 2/* 3 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18/* 19 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 20 * - modified assembly to fit into OpenZFS 21 */ 22 23#if defined(__aarch64__) 24 25 .section .note.gnu.property,"a",@note 26 .p2align 3 27 .word 4 28 .word 16 29 .word 5 30 .asciz "GNU" 31 .word 3221225472 32 .word 4 33 .word 3 34 .word 0 35.text 36 37.align 6 38.type .LK512,%object 39.LK512: 40 .quad 0x428a2f98d728ae22,0x7137449123ef65cd 41 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 42 .quad 0x3956c25bf348b538,0x59f111f1b605d019 43 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 44 .quad 0xd807aa98a3030242,0x12835b0145706fbe 45 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 46 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 47 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 48 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 49 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 50 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 51 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 52 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 53 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 54 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 55 .quad 0x06ca6351e003826f,0x142929670a0e6e70 56 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 57 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 58 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 59 .quad 0x81c2c92e47edaee6,0x92722c851482353b 60 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 61 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 62 .quad 0xd192e819d6ef5218,0xd69906245565a910 63 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 64 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 65 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 66 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 67 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 68 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 69 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 70 .quad 0x90befffa23631e28,0xa4506cebde82bde9 71 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 72 .quad 0xca273eceea26619c,0xd186b8c721c0c207 73 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 74 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 75 .quad 0x113f9804bef90dae,0x1b710b35131c471b 76 .quad 0x28db77f523047d84,0x32caab7b40c72493 77 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 78 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 79 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 80 .quad 0 // terminator 81.size .LK512,.-.LK512 82 83.globl zfs_sha512_block_armv7 84.type zfs_sha512_block_armv7,%function 85.align 6 86zfs_sha512_block_armv7: 87 hint #34 // bti c 88 stp x29,x30,[sp,#-128]! 89 add x29,sp,#0 90 91 stp x19,x20,[sp,#16] 92 stp x21,x22,[sp,#32] 93 stp x23,x24,[sp,#48] 94 stp x25,x26,[sp,#64] 95 stp x27,x28,[sp,#80] 96 sub sp,sp,#4*8 97 98 ldp x20,x21,[x0] // load context 99 ldp x22,x23,[x0,#2*8] 100 ldp x24,x25,[x0,#4*8] 101 add x2,x1,x2,lsl#7 // end of input 102 ldp x26,x27,[x0,#6*8] 103 adr x30,.LK512 104 stp x0,x2,[x29,#96] 105 106.Loop: 107 ldp x3,x4,[x1],#2*8 108 ldr x19,[x30],#8 // *K++ 109 eor x28,x21,x22 // magic seed 110 str x1,[x29,#112] 111#ifndef __AARCH64EB__ 112 rev x3,x3 // 0 113#endif 114 ror x16,x24,#14 115 add x27,x27,x19 // h+=K[i] 116 eor x6,x24,x24,ror#23 117 and x17,x25,x24 118 bic x19,x26,x24 119 add x27,x27,x3 // h+=X[i] 120 orr x17,x17,x19 // Ch(e,f,g) 121 eor x19,x20,x21 // a^b, b^c in next round 122 eor x16,x16,x6,ror#18 // Sigma1(e) 123 ror x6,x20,#28 124 add x27,x27,x17 // h+=Ch(e,f,g) 125 eor x17,x20,x20,ror#5 126 add x27,x27,x16 // h+=Sigma1(e) 127 and x28,x28,x19 // (b^c)&=(a^b) 128 add x23,x23,x27 // d+=h 129 eor x28,x28,x21 // Maj(a,b,c) 130 eor x17,x6,x17,ror#34 // Sigma0(a) 131 add x27,x27,x28 // h+=Maj(a,b,c) 132 ldr x28,[x30],#8 // *K++, x19 in next round 133 //add x27,x27,x17 // h+=Sigma0(a) 134#ifndef __AARCH64EB__ 135 rev x4,x4 // 1 136#endif 137 ldp x5,x6,[x1],#2*8 138 add x27,x27,x17 // h+=Sigma0(a) 139 ror x16,x23,#14 140 add x26,x26,x28 // h+=K[i] 141 eor x7,x23,x23,ror#23 142 and x17,x24,x23 143 bic x28,x25,x23 144 add x26,x26,x4 // h+=X[i] 145 orr x17,x17,x28 // Ch(e,f,g) 146 eor x28,x27,x20 // a^b, b^c in next round 147 eor x16,x16,x7,ror#18 // Sigma1(e) 148 ror x7,x27,#28 149 add x26,x26,x17 // h+=Ch(e,f,g) 150 eor x17,x27,x27,ror#5 151 add x26,x26,x16 // h+=Sigma1(e) 152 and x19,x19,x28 // (b^c)&=(a^b) 153 add x22,x22,x26 // d+=h 154 eor x19,x19,x20 // Maj(a,b,c) 155 eor x17,x7,x17,ror#34 // Sigma0(a) 156 add x26,x26,x19 // h+=Maj(a,b,c) 157 ldr x19,[x30],#8 // *K++, x28 in next round 158 //add x26,x26,x17 // h+=Sigma0(a) 159#ifndef __AARCH64EB__ 160 rev x5,x5 // 2 161#endif 162 add x26,x26,x17 // h+=Sigma0(a) 163 ror x16,x22,#14 164 add x25,x25,x19 // h+=K[i] 165 eor x8,x22,x22,ror#23 166 and x17,x23,x22 167 bic x19,x24,x22 168 add x25,x25,x5 // h+=X[i] 169 orr x17,x17,x19 // Ch(e,f,g) 170 eor x19,x26,x27 // a^b, b^c in next round 171 eor x16,x16,x8,ror#18 // Sigma1(e) 172 ror x8,x26,#28 173 add x25,x25,x17 // h+=Ch(e,f,g) 174 eor x17,x26,x26,ror#5 175 add x25,x25,x16 // h+=Sigma1(e) 176 and x28,x28,x19 // (b^c)&=(a^b) 177 add x21,x21,x25 // d+=h 178 eor x28,x28,x27 // Maj(a,b,c) 179 eor x17,x8,x17,ror#34 // Sigma0(a) 180 add x25,x25,x28 // h+=Maj(a,b,c) 181 ldr x28,[x30],#8 // *K++, x19 in next round 182 //add x25,x25,x17 // h+=Sigma0(a) 183#ifndef __AARCH64EB__ 184 rev x6,x6 // 3 185#endif 186 ldp x7,x8,[x1],#2*8 187 add x25,x25,x17 // h+=Sigma0(a) 188 ror x16,x21,#14 189 add x24,x24,x28 // h+=K[i] 190 eor x9,x21,x21,ror#23 191 and x17,x22,x21 192 bic x28,x23,x21 193 add x24,x24,x6 // h+=X[i] 194 orr x17,x17,x28 // Ch(e,f,g) 195 eor x28,x25,x26 // a^b, b^c in next round 196 eor x16,x16,x9,ror#18 // Sigma1(e) 197 ror x9,x25,#28 198 add x24,x24,x17 // h+=Ch(e,f,g) 199 eor x17,x25,x25,ror#5 200 add x24,x24,x16 // h+=Sigma1(e) 201 and x19,x19,x28 // (b^c)&=(a^b) 202 add x20,x20,x24 // d+=h 203 eor x19,x19,x26 // Maj(a,b,c) 204 eor x17,x9,x17,ror#34 // Sigma0(a) 205 add x24,x24,x19 // h+=Maj(a,b,c) 206 ldr x19,[x30],#8 // *K++, x28 in next round 207 //add x24,x24,x17 // h+=Sigma0(a) 208#ifndef __AARCH64EB__ 209 rev x7,x7 // 4 210#endif 211 add x24,x24,x17 // h+=Sigma0(a) 212 ror x16,x20,#14 213 add x23,x23,x19 // h+=K[i] 214 eor x10,x20,x20,ror#23 215 and x17,x21,x20 216 bic x19,x22,x20 217 add x23,x23,x7 // h+=X[i] 218 orr x17,x17,x19 // Ch(e,f,g) 219 eor x19,x24,x25 // a^b, b^c in next round 220 eor x16,x16,x10,ror#18 // Sigma1(e) 221 ror x10,x24,#28 222 add x23,x23,x17 // h+=Ch(e,f,g) 223 eor x17,x24,x24,ror#5 224 add x23,x23,x16 // h+=Sigma1(e) 225 and x28,x28,x19 // (b^c)&=(a^b) 226 add x27,x27,x23 // d+=h 227 eor x28,x28,x25 // Maj(a,b,c) 228 eor x17,x10,x17,ror#34 // Sigma0(a) 229 add x23,x23,x28 // h+=Maj(a,b,c) 230 ldr x28,[x30],#8 // *K++, x19 in next round 231 //add x23,x23,x17 // h+=Sigma0(a) 232#ifndef __AARCH64EB__ 233 rev x8,x8 // 5 234#endif 235 ldp x9,x10,[x1],#2*8 236 add x23,x23,x17 // h+=Sigma0(a) 237 ror x16,x27,#14 238 add x22,x22,x28 // h+=K[i] 239 eor x11,x27,x27,ror#23 240 and x17,x20,x27 241 bic x28,x21,x27 242 add x22,x22,x8 // h+=X[i] 243 orr x17,x17,x28 // Ch(e,f,g) 244 eor x28,x23,x24 // a^b, b^c in next round 245 eor x16,x16,x11,ror#18 // Sigma1(e) 246 ror x11,x23,#28 247 add x22,x22,x17 // h+=Ch(e,f,g) 248 eor x17,x23,x23,ror#5 249 add x22,x22,x16 // h+=Sigma1(e) 250 and x19,x19,x28 // (b^c)&=(a^b) 251 add x26,x26,x22 // d+=h 252 eor x19,x19,x24 // Maj(a,b,c) 253 eor x17,x11,x17,ror#34 // Sigma0(a) 254 add x22,x22,x19 // h+=Maj(a,b,c) 255 ldr x19,[x30],#8 // *K++, x28 in next round 256 //add x22,x22,x17 // h+=Sigma0(a) 257#ifndef __AARCH64EB__ 258 rev x9,x9 // 6 259#endif 260 add x22,x22,x17 // h+=Sigma0(a) 261 ror x16,x26,#14 262 add x21,x21,x19 // h+=K[i] 263 eor x12,x26,x26,ror#23 264 and x17,x27,x26 265 bic x19,x20,x26 266 add x21,x21,x9 // h+=X[i] 267 orr x17,x17,x19 // Ch(e,f,g) 268 eor x19,x22,x23 // a^b, b^c in next round 269 eor x16,x16,x12,ror#18 // Sigma1(e) 270 ror x12,x22,#28 271 add x21,x21,x17 // h+=Ch(e,f,g) 272 eor x17,x22,x22,ror#5 273 add x21,x21,x16 // h+=Sigma1(e) 274 and x28,x28,x19 // (b^c)&=(a^b) 275 add x25,x25,x21 // d+=h 276 eor x28,x28,x23 // Maj(a,b,c) 277 eor x17,x12,x17,ror#34 // Sigma0(a) 278 add x21,x21,x28 // h+=Maj(a,b,c) 279 ldr x28,[x30],#8 // *K++, x19 in next round 280 //add x21,x21,x17 // h+=Sigma0(a) 281#ifndef __AARCH64EB__ 282 rev x10,x10 // 7 283#endif 284 ldp x11,x12,[x1],#2*8 285 add x21,x21,x17 // h+=Sigma0(a) 286 ror x16,x25,#14 287 add x20,x20,x28 // h+=K[i] 288 eor x13,x25,x25,ror#23 289 and x17,x26,x25 290 bic x28,x27,x25 291 add x20,x20,x10 // h+=X[i] 292 orr x17,x17,x28 // Ch(e,f,g) 293 eor x28,x21,x22 // a^b, b^c in next round 294 eor x16,x16,x13,ror#18 // Sigma1(e) 295 ror x13,x21,#28 296 add x20,x20,x17 // h+=Ch(e,f,g) 297 eor x17,x21,x21,ror#5 298 add x20,x20,x16 // h+=Sigma1(e) 299 and x19,x19,x28 // (b^c)&=(a^b) 300 add x24,x24,x20 // d+=h 301 eor x19,x19,x22 // Maj(a,b,c) 302 eor x17,x13,x17,ror#34 // Sigma0(a) 303 add x20,x20,x19 // h+=Maj(a,b,c) 304 ldr x19,[x30],#8 // *K++, x28 in next round 305 //add x20,x20,x17 // h+=Sigma0(a) 306#ifndef __AARCH64EB__ 307 rev x11,x11 // 8 308#endif 309 add x20,x20,x17 // h+=Sigma0(a) 310 ror x16,x24,#14 311 add x27,x27,x19 // h+=K[i] 312 eor x14,x24,x24,ror#23 313 and x17,x25,x24 314 bic x19,x26,x24 315 add x27,x27,x11 // h+=X[i] 316 orr x17,x17,x19 // Ch(e,f,g) 317 eor x19,x20,x21 // a^b, b^c in next round 318 eor x16,x16,x14,ror#18 // Sigma1(e) 319 ror x14,x20,#28 320 add x27,x27,x17 // h+=Ch(e,f,g) 321 eor x17,x20,x20,ror#5 322 add x27,x27,x16 // h+=Sigma1(e) 323 and x28,x28,x19 // (b^c)&=(a^b) 324 add x23,x23,x27 // d+=h 325 eor x28,x28,x21 // Maj(a,b,c) 326 eor x17,x14,x17,ror#34 // Sigma0(a) 327 add x27,x27,x28 // h+=Maj(a,b,c) 328 ldr x28,[x30],#8 // *K++, x19 in next round 329 //add x27,x27,x17 // h+=Sigma0(a) 330#ifndef __AARCH64EB__ 331 rev x12,x12 // 9 332#endif 333 ldp x13,x14,[x1],#2*8 334 add x27,x27,x17 // h+=Sigma0(a) 335 ror x16,x23,#14 336 add x26,x26,x28 // h+=K[i] 337 eor x15,x23,x23,ror#23 338 and x17,x24,x23 339 bic x28,x25,x23 340 add x26,x26,x12 // h+=X[i] 341 orr x17,x17,x28 // Ch(e,f,g) 342 eor x28,x27,x20 // a^b, b^c in next round 343 eor x16,x16,x15,ror#18 // Sigma1(e) 344 ror x15,x27,#28 345 add x26,x26,x17 // h+=Ch(e,f,g) 346 eor x17,x27,x27,ror#5 347 add x26,x26,x16 // h+=Sigma1(e) 348 and x19,x19,x28 // (b^c)&=(a^b) 349 add x22,x22,x26 // d+=h 350 eor x19,x19,x20 // Maj(a,b,c) 351 eor x17,x15,x17,ror#34 // Sigma0(a) 352 add x26,x26,x19 // h+=Maj(a,b,c) 353 ldr x19,[x30],#8 // *K++, x28 in next round 354 //add x26,x26,x17 // h+=Sigma0(a) 355#ifndef __AARCH64EB__ 356 rev x13,x13 // 10 357#endif 358 add x26,x26,x17 // h+=Sigma0(a) 359 ror x16,x22,#14 360 add x25,x25,x19 // h+=K[i] 361 eor x0,x22,x22,ror#23 362 and x17,x23,x22 363 bic x19,x24,x22 364 add x25,x25,x13 // h+=X[i] 365 orr x17,x17,x19 // Ch(e,f,g) 366 eor x19,x26,x27 // a^b, b^c in next round 367 eor x16,x16,x0,ror#18 // Sigma1(e) 368 ror x0,x26,#28 369 add x25,x25,x17 // h+=Ch(e,f,g) 370 eor x17,x26,x26,ror#5 371 add x25,x25,x16 // h+=Sigma1(e) 372 and x28,x28,x19 // (b^c)&=(a^b) 373 add x21,x21,x25 // d+=h 374 eor x28,x28,x27 // Maj(a,b,c) 375 eor x17,x0,x17,ror#34 // Sigma0(a) 376 add x25,x25,x28 // h+=Maj(a,b,c) 377 ldr x28,[x30],#8 // *K++, x19 in next round 378 //add x25,x25,x17 // h+=Sigma0(a) 379#ifndef __AARCH64EB__ 380 rev x14,x14 // 11 381#endif 382 ldp x15,x0,[x1],#2*8 383 add x25,x25,x17 // h+=Sigma0(a) 384 str x6,[sp,#24] 385 ror x16,x21,#14 386 add x24,x24,x28 // h+=K[i] 387 eor x6,x21,x21,ror#23 388 and x17,x22,x21 389 bic x28,x23,x21 390 add x24,x24,x14 // h+=X[i] 391 orr x17,x17,x28 // Ch(e,f,g) 392 eor x28,x25,x26 // a^b, b^c in next round 393 eor x16,x16,x6,ror#18 // Sigma1(e) 394 ror x6,x25,#28 395 add x24,x24,x17 // h+=Ch(e,f,g) 396 eor x17,x25,x25,ror#5 397 add x24,x24,x16 // h+=Sigma1(e) 398 and x19,x19,x28 // (b^c)&=(a^b) 399 add x20,x20,x24 // d+=h 400 eor x19,x19,x26 // Maj(a,b,c) 401 eor x17,x6,x17,ror#34 // Sigma0(a) 402 add x24,x24,x19 // h+=Maj(a,b,c) 403 ldr x19,[x30],#8 // *K++, x28 in next round 404 //add x24,x24,x17 // h+=Sigma0(a) 405#ifndef __AARCH64EB__ 406 rev x15,x15 // 12 407#endif 408 add x24,x24,x17 // h+=Sigma0(a) 409 str x7,[sp,#0] 410 ror x16,x20,#14 411 add x23,x23,x19 // h+=K[i] 412 eor x7,x20,x20,ror#23 413 and x17,x21,x20 414 bic x19,x22,x20 415 add x23,x23,x15 // h+=X[i] 416 orr x17,x17,x19 // Ch(e,f,g) 417 eor x19,x24,x25 // a^b, b^c in next round 418 eor x16,x16,x7,ror#18 // Sigma1(e) 419 ror x7,x24,#28 420 add x23,x23,x17 // h+=Ch(e,f,g) 421 eor x17,x24,x24,ror#5 422 add x23,x23,x16 // h+=Sigma1(e) 423 and x28,x28,x19 // (b^c)&=(a^b) 424 add x27,x27,x23 // d+=h 425 eor x28,x28,x25 // Maj(a,b,c) 426 eor x17,x7,x17,ror#34 // Sigma0(a) 427 add x23,x23,x28 // h+=Maj(a,b,c) 428 ldr x28,[x30],#8 // *K++, x19 in next round 429 //add x23,x23,x17 // h+=Sigma0(a) 430#ifndef __AARCH64EB__ 431 rev x0,x0 // 13 432#endif 433 ldp x1,x2,[x1] 434 add x23,x23,x17 // h+=Sigma0(a) 435 str x8,[sp,#8] 436 ror x16,x27,#14 437 add x22,x22,x28 // h+=K[i] 438 eor x8,x27,x27,ror#23 439 and x17,x20,x27 440 bic x28,x21,x27 441 add x22,x22,x0 // h+=X[i] 442 orr x17,x17,x28 // Ch(e,f,g) 443 eor x28,x23,x24 // a^b, b^c in next round 444 eor x16,x16,x8,ror#18 // Sigma1(e) 445 ror x8,x23,#28 446 add x22,x22,x17 // h+=Ch(e,f,g) 447 eor x17,x23,x23,ror#5 448 add x22,x22,x16 // h+=Sigma1(e) 449 and x19,x19,x28 // (b^c)&=(a^b) 450 add x26,x26,x22 // d+=h 451 eor x19,x19,x24 // Maj(a,b,c) 452 eor x17,x8,x17,ror#34 // Sigma0(a) 453 add x22,x22,x19 // h+=Maj(a,b,c) 454 ldr x19,[x30],#8 // *K++, x28 in next round 455 //add x22,x22,x17 // h+=Sigma0(a) 456#ifndef __AARCH64EB__ 457 rev x1,x1 // 14 458#endif 459 ldr x6,[sp,#24] 460 add x22,x22,x17 // h+=Sigma0(a) 461 str x9,[sp,#16] 462 ror x16,x26,#14 463 add x21,x21,x19 // h+=K[i] 464 eor x9,x26,x26,ror#23 465 and x17,x27,x26 466 bic x19,x20,x26 467 add x21,x21,x1 // h+=X[i] 468 orr x17,x17,x19 // Ch(e,f,g) 469 eor x19,x22,x23 // a^b, b^c in next round 470 eor x16,x16,x9,ror#18 // Sigma1(e) 471 ror x9,x22,#28 472 add x21,x21,x17 // h+=Ch(e,f,g) 473 eor x17,x22,x22,ror#5 474 add x21,x21,x16 // h+=Sigma1(e) 475 and x28,x28,x19 // (b^c)&=(a^b) 476 add x25,x25,x21 // d+=h 477 eor x28,x28,x23 // Maj(a,b,c) 478 eor x17,x9,x17,ror#34 // Sigma0(a) 479 add x21,x21,x28 // h+=Maj(a,b,c) 480 ldr x28,[x30],#8 // *K++, x19 in next round 481 //add x21,x21,x17 // h+=Sigma0(a) 482#ifndef __AARCH64EB__ 483 rev x2,x2 // 15 484#endif 485 ldr x7,[sp,#0] 486 add x21,x21,x17 // h+=Sigma0(a) 487 str x10,[sp,#24] 488 ror x16,x25,#14 489 add x20,x20,x28 // h+=K[i] 490 ror x9,x4,#1 491 and x17,x26,x25 492 ror x8,x1,#19 493 bic x28,x27,x25 494 ror x10,x21,#28 495 add x20,x20,x2 // h+=X[i] 496 eor x16,x16,x25,ror#18 497 eor x9,x9,x4,ror#8 498 orr x17,x17,x28 // Ch(e,f,g) 499 eor x28,x21,x22 // a^b, b^c in next round 500 eor x16,x16,x25,ror#41 // Sigma1(e) 501 eor x10,x10,x21,ror#34 502 add x20,x20,x17 // h+=Ch(e,f,g) 503 and x19,x19,x28 // (b^c)&=(a^b) 504 eor x8,x8,x1,ror#61 505 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 506 add x20,x20,x16 // h+=Sigma1(e) 507 eor x19,x19,x22 // Maj(a,b,c) 508 eor x17,x10,x21,ror#39 // Sigma0(a) 509 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 510 add x3,x3,x12 511 add x24,x24,x20 // d+=h 512 add x20,x20,x19 // h+=Maj(a,b,c) 513 ldr x19,[x30],#8 // *K++, x28 in next round 514 add x3,x3,x9 515 add x20,x20,x17 // h+=Sigma0(a) 516 add x3,x3,x8 517.Loop_16_xx: 518 ldr x8,[sp,#8] 519 str x11,[sp,#0] 520 ror x16,x24,#14 521 add x27,x27,x19 // h+=K[i] 522 ror x10,x5,#1 523 and x17,x25,x24 524 ror x9,x2,#19 525 bic x19,x26,x24 526 ror x11,x20,#28 527 add x27,x27,x3 // h+=X[i] 528 eor x16,x16,x24,ror#18 529 eor x10,x10,x5,ror#8 530 orr x17,x17,x19 // Ch(e,f,g) 531 eor x19,x20,x21 // a^b, b^c in next round 532 eor x16,x16,x24,ror#41 // Sigma1(e) 533 eor x11,x11,x20,ror#34 534 add x27,x27,x17 // h+=Ch(e,f,g) 535 and x28,x28,x19 // (b^c)&=(a^b) 536 eor x9,x9,x2,ror#61 537 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 538 add x27,x27,x16 // h+=Sigma1(e) 539 eor x28,x28,x21 // Maj(a,b,c) 540 eor x17,x11,x20,ror#39 // Sigma0(a) 541 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 542 add x4,x4,x13 543 add x23,x23,x27 // d+=h 544 add x27,x27,x28 // h+=Maj(a,b,c) 545 ldr x28,[x30],#8 // *K++, x19 in next round 546 add x4,x4,x10 547 add x27,x27,x17 // h+=Sigma0(a) 548 add x4,x4,x9 549 ldr x9,[sp,#16] 550 str x12,[sp,#8] 551 ror x16,x23,#14 552 add x26,x26,x28 // h+=K[i] 553 ror x11,x6,#1 554 and x17,x24,x23 555 ror x10,x3,#19 556 bic x28,x25,x23 557 ror x12,x27,#28 558 add x26,x26,x4 // h+=X[i] 559 eor x16,x16,x23,ror#18 560 eor x11,x11,x6,ror#8 561 orr x17,x17,x28 // Ch(e,f,g) 562 eor x28,x27,x20 // a^b, b^c in next round 563 eor x16,x16,x23,ror#41 // Sigma1(e) 564 eor x12,x12,x27,ror#34 565 add x26,x26,x17 // h+=Ch(e,f,g) 566 and x19,x19,x28 // (b^c)&=(a^b) 567 eor x10,x10,x3,ror#61 568 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 569 add x26,x26,x16 // h+=Sigma1(e) 570 eor x19,x19,x20 // Maj(a,b,c) 571 eor x17,x12,x27,ror#39 // Sigma0(a) 572 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 573 add x5,x5,x14 574 add x22,x22,x26 // d+=h 575 add x26,x26,x19 // h+=Maj(a,b,c) 576 ldr x19,[x30],#8 // *K++, x28 in next round 577 add x5,x5,x11 578 add x26,x26,x17 // h+=Sigma0(a) 579 add x5,x5,x10 580 ldr x10,[sp,#24] 581 str x13,[sp,#16] 582 ror x16,x22,#14 583 add x25,x25,x19 // h+=K[i] 584 ror x12,x7,#1 585 and x17,x23,x22 586 ror x11,x4,#19 587 bic x19,x24,x22 588 ror x13,x26,#28 589 add x25,x25,x5 // h+=X[i] 590 eor x16,x16,x22,ror#18 591 eor x12,x12,x7,ror#8 592 orr x17,x17,x19 // Ch(e,f,g) 593 eor x19,x26,x27 // a^b, b^c in next round 594 eor x16,x16,x22,ror#41 // Sigma1(e) 595 eor x13,x13,x26,ror#34 596 add x25,x25,x17 // h+=Ch(e,f,g) 597 and x28,x28,x19 // (b^c)&=(a^b) 598 eor x11,x11,x4,ror#61 599 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 600 add x25,x25,x16 // h+=Sigma1(e) 601 eor x28,x28,x27 // Maj(a,b,c) 602 eor x17,x13,x26,ror#39 // Sigma0(a) 603 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 604 add x6,x6,x15 605 add x21,x21,x25 // d+=h 606 add x25,x25,x28 // h+=Maj(a,b,c) 607 ldr x28,[x30],#8 // *K++, x19 in next round 608 add x6,x6,x12 609 add x25,x25,x17 // h+=Sigma0(a) 610 add x6,x6,x11 611 ldr x11,[sp,#0] 612 str x14,[sp,#24] 613 ror x16,x21,#14 614 add x24,x24,x28 // h+=K[i] 615 ror x13,x8,#1 616 and x17,x22,x21 617 ror x12,x5,#19 618 bic x28,x23,x21 619 ror x14,x25,#28 620 add x24,x24,x6 // h+=X[i] 621 eor x16,x16,x21,ror#18 622 eor x13,x13,x8,ror#8 623 orr x17,x17,x28 // Ch(e,f,g) 624 eor x28,x25,x26 // a^b, b^c in next round 625 eor x16,x16,x21,ror#41 // Sigma1(e) 626 eor x14,x14,x25,ror#34 627 add x24,x24,x17 // h+=Ch(e,f,g) 628 and x19,x19,x28 // (b^c)&=(a^b) 629 eor x12,x12,x5,ror#61 630 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 631 add x24,x24,x16 // h+=Sigma1(e) 632 eor x19,x19,x26 // Maj(a,b,c) 633 eor x17,x14,x25,ror#39 // Sigma0(a) 634 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 635 add x7,x7,x0 636 add x20,x20,x24 // d+=h 637 add x24,x24,x19 // h+=Maj(a,b,c) 638 ldr x19,[x30],#8 // *K++, x28 in next round 639 add x7,x7,x13 640 add x24,x24,x17 // h+=Sigma0(a) 641 add x7,x7,x12 642 ldr x12,[sp,#8] 643 str x15,[sp,#0] 644 ror x16,x20,#14 645 add x23,x23,x19 // h+=K[i] 646 ror x14,x9,#1 647 and x17,x21,x20 648 ror x13,x6,#19 649 bic x19,x22,x20 650 ror x15,x24,#28 651 add x23,x23,x7 // h+=X[i] 652 eor x16,x16,x20,ror#18 653 eor x14,x14,x9,ror#8 654 orr x17,x17,x19 // Ch(e,f,g) 655 eor x19,x24,x25 // a^b, b^c in next round 656 eor x16,x16,x20,ror#41 // Sigma1(e) 657 eor x15,x15,x24,ror#34 658 add x23,x23,x17 // h+=Ch(e,f,g) 659 and x28,x28,x19 // (b^c)&=(a^b) 660 eor x13,x13,x6,ror#61 661 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 662 add x23,x23,x16 // h+=Sigma1(e) 663 eor x28,x28,x25 // Maj(a,b,c) 664 eor x17,x15,x24,ror#39 // Sigma0(a) 665 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 666 add x8,x8,x1 667 add x27,x27,x23 // d+=h 668 add x23,x23,x28 // h+=Maj(a,b,c) 669 ldr x28,[x30],#8 // *K++, x19 in next round 670 add x8,x8,x14 671 add x23,x23,x17 // h+=Sigma0(a) 672 add x8,x8,x13 673 ldr x13,[sp,#16] 674 str x0,[sp,#8] 675 ror x16,x27,#14 676 add x22,x22,x28 // h+=K[i] 677 ror x15,x10,#1 678 and x17,x20,x27 679 ror x14,x7,#19 680 bic x28,x21,x27 681 ror x0,x23,#28 682 add x22,x22,x8 // h+=X[i] 683 eor x16,x16,x27,ror#18 684 eor x15,x15,x10,ror#8 685 orr x17,x17,x28 // Ch(e,f,g) 686 eor x28,x23,x24 // a^b, b^c in next round 687 eor x16,x16,x27,ror#41 // Sigma1(e) 688 eor x0,x0,x23,ror#34 689 add x22,x22,x17 // h+=Ch(e,f,g) 690 and x19,x19,x28 // (b^c)&=(a^b) 691 eor x14,x14,x7,ror#61 692 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 693 add x22,x22,x16 // h+=Sigma1(e) 694 eor x19,x19,x24 // Maj(a,b,c) 695 eor x17,x0,x23,ror#39 // Sigma0(a) 696 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 697 add x9,x9,x2 698 add x26,x26,x22 // d+=h 699 add x22,x22,x19 // h+=Maj(a,b,c) 700 ldr x19,[x30],#8 // *K++, x28 in next round 701 add x9,x9,x15 702 add x22,x22,x17 // h+=Sigma0(a) 703 add x9,x9,x14 704 ldr x14,[sp,#24] 705 str x1,[sp,#16] 706 ror x16,x26,#14 707 add x21,x21,x19 // h+=K[i] 708 ror x0,x11,#1 709 and x17,x27,x26 710 ror x15,x8,#19 711 bic x19,x20,x26 712 ror x1,x22,#28 713 add x21,x21,x9 // h+=X[i] 714 eor x16,x16,x26,ror#18 715 eor x0,x0,x11,ror#8 716 orr x17,x17,x19 // Ch(e,f,g) 717 eor x19,x22,x23 // a^b, b^c in next round 718 eor x16,x16,x26,ror#41 // Sigma1(e) 719 eor x1,x1,x22,ror#34 720 add x21,x21,x17 // h+=Ch(e,f,g) 721 and x28,x28,x19 // (b^c)&=(a^b) 722 eor x15,x15,x8,ror#61 723 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 724 add x21,x21,x16 // h+=Sigma1(e) 725 eor x28,x28,x23 // Maj(a,b,c) 726 eor x17,x1,x22,ror#39 // Sigma0(a) 727 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 728 add x10,x10,x3 729 add x25,x25,x21 // d+=h 730 add x21,x21,x28 // h+=Maj(a,b,c) 731 ldr x28,[x30],#8 // *K++, x19 in next round 732 add x10,x10,x0 733 add x21,x21,x17 // h+=Sigma0(a) 734 add x10,x10,x15 735 ldr x15,[sp,#0] 736 str x2,[sp,#24] 737 ror x16,x25,#14 738 add x20,x20,x28 // h+=K[i] 739 ror x1,x12,#1 740 and x17,x26,x25 741 ror x0,x9,#19 742 bic x28,x27,x25 743 ror x2,x21,#28 744 add x20,x20,x10 // h+=X[i] 745 eor x16,x16,x25,ror#18 746 eor x1,x1,x12,ror#8 747 orr x17,x17,x28 // Ch(e,f,g) 748 eor x28,x21,x22 // a^b, b^c in next round 749 eor x16,x16,x25,ror#41 // Sigma1(e) 750 eor x2,x2,x21,ror#34 751 add x20,x20,x17 // h+=Ch(e,f,g) 752 and x19,x19,x28 // (b^c)&=(a^b) 753 eor x0,x0,x9,ror#61 754 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 755 add x20,x20,x16 // h+=Sigma1(e) 756 eor x19,x19,x22 // Maj(a,b,c) 757 eor x17,x2,x21,ror#39 // Sigma0(a) 758 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 759 add x11,x11,x4 760 add x24,x24,x20 // d+=h 761 add x20,x20,x19 // h+=Maj(a,b,c) 762 ldr x19,[x30],#8 // *K++, x28 in next round 763 add x11,x11,x1 764 add x20,x20,x17 // h+=Sigma0(a) 765 add x11,x11,x0 766 ldr x0,[sp,#8] 767 str x3,[sp,#0] 768 ror x16,x24,#14 769 add x27,x27,x19 // h+=K[i] 770 ror x2,x13,#1 771 and x17,x25,x24 772 ror x1,x10,#19 773 bic x19,x26,x24 774 ror x3,x20,#28 775 add x27,x27,x11 // h+=X[i] 776 eor x16,x16,x24,ror#18 777 eor x2,x2,x13,ror#8 778 orr x17,x17,x19 // Ch(e,f,g) 779 eor x19,x20,x21 // a^b, b^c in next round 780 eor x16,x16,x24,ror#41 // Sigma1(e) 781 eor x3,x3,x20,ror#34 782 add x27,x27,x17 // h+=Ch(e,f,g) 783 and x28,x28,x19 // (b^c)&=(a^b) 784 eor x1,x1,x10,ror#61 785 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 786 add x27,x27,x16 // h+=Sigma1(e) 787 eor x28,x28,x21 // Maj(a,b,c) 788 eor x17,x3,x20,ror#39 // Sigma0(a) 789 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 790 add x12,x12,x5 791 add x23,x23,x27 // d+=h 792 add x27,x27,x28 // h+=Maj(a,b,c) 793 ldr x28,[x30],#8 // *K++, x19 in next round 794 add x12,x12,x2 795 add x27,x27,x17 // h+=Sigma0(a) 796 add x12,x12,x1 797 ldr x1,[sp,#16] 798 str x4,[sp,#8] 799 ror x16,x23,#14 800 add x26,x26,x28 // h+=K[i] 801 ror x3,x14,#1 802 and x17,x24,x23 803 ror x2,x11,#19 804 bic x28,x25,x23 805 ror x4,x27,#28 806 add x26,x26,x12 // h+=X[i] 807 eor x16,x16,x23,ror#18 808 eor x3,x3,x14,ror#8 809 orr x17,x17,x28 // Ch(e,f,g) 810 eor x28,x27,x20 // a^b, b^c in next round 811 eor x16,x16,x23,ror#41 // Sigma1(e) 812 eor x4,x4,x27,ror#34 813 add x26,x26,x17 // h+=Ch(e,f,g) 814 and x19,x19,x28 // (b^c)&=(a^b) 815 eor x2,x2,x11,ror#61 816 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 817 add x26,x26,x16 // h+=Sigma1(e) 818 eor x19,x19,x20 // Maj(a,b,c) 819 eor x17,x4,x27,ror#39 // Sigma0(a) 820 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 821 add x13,x13,x6 822 add x22,x22,x26 // d+=h 823 add x26,x26,x19 // h+=Maj(a,b,c) 824 ldr x19,[x30],#8 // *K++, x28 in next round 825 add x13,x13,x3 826 add x26,x26,x17 // h+=Sigma0(a) 827 add x13,x13,x2 828 ldr x2,[sp,#24] 829 str x5,[sp,#16] 830 ror x16,x22,#14 831 add x25,x25,x19 // h+=K[i] 832 ror x4,x15,#1 833 and x17,x23,x22 834 ror x3,x12,#19 835 bic x19,x24,x22 836 ror x5,x26,#28 837 add x25,x25,x13 // h+=X[i] 838 eor x16,x16,x22,ror#18 839 eor x4,x4,x15,ror#8 840 orr x17,x17,x19 // Ch(e,f,g) 841 eor x19,x26,x27 // a^b, b^c in next round 842 eor x16,x16,x22,ror#41 // Sigma1(e) 843 eor x5,x5,x26,ror#34 844 add x25,x25,x17 // h+=Ch(e,f,g) 845 and x28,x28,x19 // (b^c)&=(a^b) 846 eor x3,x3,x12,ror#61 847 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 848 add x25,x25,x16 // h+=Sigma1(e) 849 eor x28,x28,x27 // Maj(a,b,c) 850 eor x17,x5,x26,ror#39 // Sigma0(a) 851 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 852 add x14,x14,x7 853 add x21,x21,x25 // d+=h 854 add x25,x25,x28 // h+=Maj(a,b,c) 855 ldr x28,[x30],#8 // *K++, x19 in next round 856 add x14,x14,x4 857 add x25,x25,x17 // h+=Sigma0(a) 858 add x14,x14,x3 859 ldr x3,[sp,#0] 860 str x6,[sp,#24] 861 ror x16,x21,#14 862 add x24,x24,x28 // h+=K[i] 863 ror x5,x0,#1 864 and x17,x22,x21 865 ror x4,x13,#19 866 bic x28,x23,x21 867 ror x6,x25,#28 868 add x24,x24,x14 // h+=X[i] 869 eor x16,x16,x21,ror#18 870 eor x5,x5,x0,ror#8 871 orr x17,x17,x28 // Ch(e,f,g) 872 eor x28,x25,x26 // a^b, b^c in next round 873 eor x16,x16,x21,ror#41 // Sigma1(e) 874 eor x6,x6,x25,ror#34 875 add x24,x24,x17 // h+=Ch(e,f,g) 876 and x19,x19,x28 // (b^c)&=(a^b) 877 eor x4,x4,x13,ror#61 878 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 879 add x24,x24,x16 // h+=Sigma1(e) 880 eor x19,x19,x26 // Maj(a,b,c) 881 eor x17,x6,x25,ror#39 // Sigma0(a) 882 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 883 add x15,x15,x8 884 add x20,x20,x24 // d+=h 885 add x24,x24,x19 // h+=Maj(a,b,c) 886 ldr x19,[x30],#8 // *K++, x28 in next round 887 add x15,x15,x5 888 add x24,x24,x17 // h+=Sigma0(a) 889 add x15,x15,x4 890 ldr x4,[sp,#8] 891 str x7,[sp,#0] 892 ror x16,x20,#14 893 add x23,x23,x19 // h+=K[i] 894 ror x6,x1,#1 895 and x17,x21,x20 896 ror x5,x14,#19 897 bic x19,x22,x20 898 ror x7,x24,#28 899 add x23,x23,x15 // h+=X[i] 900 eor x16,x16,x20,ror#18 901 eor x6,x6,x1,ror#8 902 orr x17,x17,x19 // Ch(e,f,g) 903 eor x19,x24,x25 // a^b, b^c in next round 904 eor x16,x16,x20,ror#41 // Sigma1(e) 905 eor x7,x7,x24,ror#34 906 add x23,x23,x17 // h+=Ch(e,f,g) 907 and x28,x28,x19 // (b^c)&=(a^b) 908 eor x5,x5,x14,ror#61 909 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 910 add x23,x23,x16 // h+=Sigma1(e) 911 eor x28,x28,x25 // Maj(a,b,c) 912 eor x17,x7,x24,ror#39 // Sigma0(a) 913 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 914 add x0,x0,x9 915 add x27,x27,x23 // d+=h 916 add x23,x23,x28 // h+=Maj(a,b,c) 917 ldr x28,[x30],#8 // *K++, x19 in next round 918 add x0,x0,x6 919 add x23,x23,x17 // h+=Sigma0(a) 920 add x0,x0,x5 921 ldr x5,[sp,#16] 922 str x8,[sp,#8] 923 ror x16,x27,#14 924 add x22,x22,x28 // h+=K[i] 925 ror x7,x2,#1 926 and x17,x20,x27 927 ror x6,x15,#19 928 bic x28,x21,x27 929 ror x8,x23,#28 930 add x22,x22,x0 // h+=X[i] 931 eor x16,x16,x27,ror#18 932 eor x7,x7,x2,ror#8 933 orr x17,x17,x28 // Ch(e,f,g) 934 eor x28,x23,x24 // a^b, b^c in next round 935 eor x16,x16,x27,ror#41 // Sigma1(e) 936 eor x8,x8,x23,ror#34 937 add x22,x22,x17 // h+=Ch(e,f,g) 938 and x19,x19,x28 // (b^c)&=(a^b) 939 eor x6,x6,x15,ror#61 940 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 941 add x22,x22,x16 // h+=Sigma1(e) 942 eor x19,x19,x24 // Maj(a,b,c) 943 eor x17,x8,x23,ror#39 // Sigma0(a) 944 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 945 add x1,x1,x10 946 add x26,x26,x22 // d+=h 947 add x22,x22,x19 // h+=Maj(a,b,c) 948 ldr x19,[x30],#8 // *K++, x28 in next round 949 add x1,x1,x7 950 add x22,x22,x17 // h+=Sigma0(a) 951 add x1,x1,x6 952 ldr x6,[sp,#24] 953 str x9,[sp,#16] 954 ror x16,x26,#14 955 add x21,x21,x19 // h+=K[i] 956 ror x8,x3,#1 957 and x17,x27,x26 958 ror x7,x0,#19 959 bic x19,x20,x26 960 ror x9,x22,#28 961 add x21,x21,x1 // h+=X[i] 962 eor x16,x16,x26,ror#18 963 eor x8,x8,x3,ror#8 964 orr x17,x17,x19 // Ch(e,f,g) 965 eor x19,x22,x23 // a^b, b^c in next round 966 eor x16,x16,x26,ror#41 // Sigma1(e) 967 eor x9,x9,x22,ror#34 968 add x21,x21,x17 // h+=Ch(e,f,g) 969 and x28,x28,x19 // (b^c)&=(a^b) 970 eor x7,x7,x0,ror#61 971 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 972 add x21,x21,x16 // h+=Sigma1(e) 973 eor x28,x28,x23 // Maj(a,b,c) 974 eor x17,x9,x22,ror#39 // Sigma0(a) 975 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 976 add x2,x2,x11 977 add x25,x25,x21 // d+=h 978 add x21,x21,x28 // h+=Maj(a,b,c) 979 ldr x28,[x30],#8 // *K++, x19 in next round 980 add x2,x2,x8 981 add x21,x21,x17 // h+=Sigma0(a) 982 add x2,x2,x7 983 ldr x7,[sp,#0] 984 str x10,[sp,#24] 985 ror x16,x25,#14 986 add x20,x20,x28 // h+=K[i] 987 ror x9,x4,#1 988 and x17,x26,x25 989 ror x8,x1,#19 990 bic x28,x27,x25 991 ror x10,x21,#28 992 add x20,x20,x2 // h+=X[i] 993 eor x16,x16,x25,ror#18 994 eor x9,x9,x4,ror#8 995 orr x17,x17,x28 // Ch(e,f,g) 996 eor x28,x21,x22 // a^b, b^c in next round 997 eor x16,x16,x25,ror#41 // Sigma1(e) 998 eor x10,x10,x21,ror#34 999 add x20,x20,x17 // h+=Ch(e,f,g) 1000 and x19,x19,x28 // (b^c)&=(a^b) 1001 eor x8,x8,x1,ror#61 1002 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 1003 add x20,x20,x16 // h+=Sigma1(e) 1004 eor x19,x19,x22 // Maj(a,b,c) 1005 eor x17,x10,x21,ror#39 // Sigma0(a) 1006 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1007 add x3,x3,x12 1008 add x24,x24,x20 // d+=h 1009 add x20,x20,x19 // h+=Maj(a,b,c) 1010 ldr x19,[x30],#8 // *K++, x28 in next round 1011 add x3,x3,x9 1012 add x20,x20,x17 // h+=Sigma0(a) 1013 add x3,x3,x8 1014 cbnz x19,.Loop_16_xx 1015 1016 ldp x0,x2,[x29,#96] 1017 ldr x1,[x29,#112] 1018 sub x30,x30,#648 // rewind 1019 1020 ldp x3,x4,[x0] 1021 ldp x5,x6,[x0,#2*8] 1022 add x1,x1,#14*8 // advance input pointer 1023 ldp x7,x8,[x0,#4*8] 1024 add x20,x20,x3 1025 ldp x9,x10,[x0,#6*8] 1026 add x21,x21,x4 1027 add x22,x22,x5 1028 add x23,x23,x6 1029 stp x20,x21,[x0] 1030 add x24,x24,x7 1031 add x25,x25,x8 1032 stp x22,x23,[x0,#2*8] 1033 add x26,x26,x9 1034 add x27,x27,x10 1035 cmp x1,x2 1036 stp x24,x25,[x0,#4*8] 1037 stp x26,x27,[x0,#6*8] 1038 b.ne .Loop 1039 1040 ldp x19,x20,[x29,#16] 1041 add sp,sp,#4*8 1042 ldp x21,x22,[x29,#32] 1043 ldp x23,x24,[x29,#48] 1044 ldp x25,x26,[x29,#64] 1045 ldp x27,x28,[x29,#80] 1046 ldp x29,x30,[sp],#128 1047 ret 1048.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 1049 1050 1051.globl zfs_sha512_block_armv8 1052.type zfs_sha512_block_armv8,%function 1053.align 6 1054zfs_sha512_block_armv8: 1055 hint #34 // bti c 1056.Lv8_entry: 1057 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later 1058 stp x29,x30,[sp,#-16]! 1059 add x29,sp,#0 1060 1061 ld1 {v16.16b-v19.16b},[x1],#64 // load input 1062 ld1 {v20.16b-v23.16b},[x1],#64 1063 1064 ld1 {v0.2d-v3.2d},[x0] // load context 1065 adr x3,.LK512 1066 1067 rev64 v16.16b,v16.16b 1068 rev64 v17.16b,v17.16b 1069 rev64 v18.16b,v18.16b 1070 rev64 v19.16b,v19.16b 1071 rev64 v20.16b,v20.16b 1072 rev64 v21.16b,v21.16b 1073 rev64 v22.16b,v22.16b 1074 rev64 v23.16b,v23.16b 1075 b .Loop_hw 1076 1077.align 4 1078.Loop_hw: 1079 ld1 {v24.2d},[x3],#16 1080 subs x2,x2,#1 1081 sub x4,x1,#128 1082 orr v26.16b,v0.16b,v0.16b // offload 1083 orr v27.16b,v1.16b,v1.16b 1084 orr v28.16b,v2.16b,v2.16b 1085 orr v29.16b,v3.16b,v3.16b 1086 csel x1,x1,x4,ne // conditional rewind 1087 add v24.2d,v24.2d,v16.2d 1088 ld1 {v25.2d},[x3],#16 1089 ext v24.16b,v24.16b,v24.16b,#8 1090 ext v5.16b,v2.16b,v3.16b,#8 1091 ext v6.16b,v1.16b,v2.16b,#8 1092 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1093 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1094 ext v7.16b,v20.16b,v21.16b,#8 1095 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1096 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1097 add v4.2d,v1.2d,v3.2d // "D + T1" 1098 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1099 add v25.2d,v25.2d,v17.2d 1100 ld1 {v24.2d},[x3],#16 1101 ext v25.16b,v25.16b,v25.16b,#8 1102 ext v5.16b,v4.16b,v2.16b,#8 1103 ext v6.16b,v0.16b,v4.16b,#8 1104 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1105 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1106 ext v7.16b,v21.16b,v22.16b,#8 1107 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1108 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1109 add v1.2d,v0.2d,v2.2d // "D + T1" 1110 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1111 add v24.2d,v24.2d,v18.2d 1112 ld1 {v25.2d},[x3],#16 1113 ext v24.16b,v24.16b,v24.16b,#8 1114 ext v5.16b,v1.16b,v4.16b,#8 1115 ext v6.16b,v3.16b,v1.16b,#8 1116 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1117 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1118 ext v7.16b,v22.16b,v23.16b,#8 1119 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1120 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1121 add v0.2d,v3.2d,v4.2d // "D + T1" 1122 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1123 add v25.2d,v25.2d,v19.2d 1124 ld1 {v24.2d},[x3],#16 1125 ext v25.16b,v25.16b,v25.16b,#8 1126 ext v5.16b,v0.16b,v1.16b,#8 1127 ext v6.16b,v2.16b,v0.16b,#8 1128 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1129 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1130 ext v7.16b,v23.16b,v16.16b,#8 1131 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1132 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1133 add v3.2d,v2.2d,v1.2d // "D + T1" 1134 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1135 add v24.2d,v24.2d,v20.2d 1136 ld1 {v25.2d},[x3],#16 1137 ext v24.16b,v24.16b,v24.16b,#8 1138 ext v5.16b,v3.16b,v0.16b,#8 1139 ext v6.16b,v4.16b,v3.16b,#8 1140 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1141 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1142 ext v7.16b,v16.16b,v17.16b,#8 1143 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1144 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1145 add v2.2d,v4.2d,v0.2d // "D + T1" 1146 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1147 add v25.2d,v25.2d,v21.2d 1148 ld1 {v24.2d},[x3],#16 1149 ext v25.16b,v25.16b,v25.16b,#8 1150 ext v5.16b,v2.16b,v3.16b,#8 1151 ext v6.16b,v1.16b,v2.16b,#8 1152 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1153 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1154 ext v7.16b,v17.16b,v18.16b,#8 1155 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1156 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1157 add v4.2d,v1.2d,v3.2d // "D + T1" 1158 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1159 add v24.2d,v24.2d,v22.2d 1160 ld1 {v25.2d},[x3],#16 1161 ext v24.16b,v24.16b,v24.16b,#8 1162 ext v5.16b,v4.16b,v2.16b,#8 1163 ext v6.16b,v0.16b,v4.16b,#8 1164 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1165 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1166 ext v7.16b,v18.16b,v19.16b,#8 1167 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1168 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1169 add v1.2d,v0.2d,v2.2d // "D + T1" 1170 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1171 add v25.2d,v25.2d,v23.2d 1172 ld1 {v24.2d},[x3],#16 1173 ext v25.16b,v25.16b,v25.16b,#8 1174 ext v5.16b,v1.16b,v4.16b,#8 1175 ext v6.16b,v3.16b,v1.16b,#8 1176 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1177 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1178 ext v7.16b,v19.16b,v20.16b,#8 1179 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1180 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1181 add v0.2d,v3.2d,v4.2d // "D + T1" 1182 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1183 add v24.2d,v24.2d,v16.2d 1184 ld1 {v25.2d},[x3],#16 1185 ext v24.16b,v24.16b,v24.16b,#8 1186 ext v5.16b,v0.16b,v1.16b,#8 1187 ext v6.16b,v2.16b,v0.16b,#8 1188 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1189 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1190 ext v7.16b,v20.16b,v21.16b,#8 1191 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1192 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1193 add v3.2d,v2.2d,v1.2d // "D + T1" 1194 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1195 add v25.2d,v25.2d,v17.2d 1196 ld1 {v24.2d},[x3],#16 1197 ext v25.16b,v25.16b,v25.16b,#8 1198 ext v5.16b,v3.16b,v0.16b,#8 1199 ext v6.16b,v4.16b,v3.16b,#8 1200 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1201 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1202 ext v7.16b,v21.16b,v22.16b,#8 1203 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1204 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1205 add v2.2d,v4.2d,v0.2d // "D + T1" 1206 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1207 add v24.2d,v24.2d,v18.2d 1208 ld1 {v25.2d},[x3],#16 1209 ext v24.16b,v24.16b,v24.16b,#8 1210 ext v5.16b,v2.16b,v3.16b,#8 1211 ext v6.16b,v1.16b,v2.16b,#8 1212 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1213 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1214 ext v7.16b,v22.16b,v23.16b,#8 1215 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1216 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1217 add v4.2d,v1.2d,v3.2d // "D + T1" 1218 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1219 add v25.2d,v25.2d,v19.2d 1220 ld1 {v24.2d},[x3],#16 1221 ext v25.16b,v25.16b,v25.16b,#8 1222 ext v5.16b,v4.16b,v2.16b,#8 1223 ext v6.16b,v0.16b,v4.16b,#8 1224 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1225 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1226 ext v7.16b,v23.16b,v16.16b,#8 1227 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1228 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1229 add v1.2d,v0.2d,v2.2d // "D + T1" 1230 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1231 add v24.2d,v24.2d,v20.2d 1232 ld1 {v25.2d},[x3],#16 1233 ext v24.16b,v24.16b,v24.16b,#8 1234 ext v5.16b,v1.16b,v4.16b,#8 1235 ext v6.16b,v3.16b,v1.16b,#8 1236 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1237 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1238 ext v7.16b,v16.16b,v17.16b,#8 1239 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1240 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1241 add v0.2d,v3.2d,v4.2d // "D + T1" 1242 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1243 add v25.2d,v25.2d,v21.2d 1244 ld1 {v24.2d},[x3],#16 1245 ext v25.16b,v25.16b,v25.16b,#8 1246 ext v5.16b,v0.16b,v1.16b,#8 1247 ext v6.16b,v2.16b,v0.16b,#8 1248 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1249 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1250 ext v7.16b,v17.16b,v18.16b,#8 1251 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1252 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1253 add v3.2d,v2.2d,v1.2d // "D + T1" 1254 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1255 add v24.2d,v24.2d,v22.2d 1256 ld1 {v25.2d},[x3],#16 1257 ext v24.16b,v24.16b,v24.16b,#8 1258 ext v5.16b,v3.16b,v0.16b,#8 1259 ext v6.16b,v4.16b,v3.16b,#8 1260 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1261 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1262 ext v7.16b,v18.16b,v19.16b,#8 1263 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1264 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1265 add v2.2d,v4.2d,v0.2d // "D + T1" 1266 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1267 add v25.2d,v25.2d,v23.2d 1268 ld1 {v24.2d},[x3],#16 1269 ext v25.16b,v25.16b,v25.16b,#8 1270 ext v5.16b,v2.16b,v3.16b,#8 1271 ext v6.16b,v1.16b,v2.16b,#8 1272 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1273 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1274 ext v7.16b,v19.16b,v20.16b,#8 1275 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1276 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1277 add v4.2d,v1.2d,v3.2d // "D + T1" 1278 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1279 add v24.2d,v24.2d,v16.2d 1280 ld1 {v25.2d},[x3],#16 1281 ext v24.16b,v24.16b,v24.16b,#8 1282 ext v5.16b,v4.16b,v2.16b,#8 1283 ext v6.16b,v0.16b,v4.16b,#8 1284 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1285 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1286 ext v7.16b,v20.16b,v21.16b,#8 1287 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1288 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1289 add v1.2d,v0.2d,v2.2d // "D + T1" 1290 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1291 add v25.2d,v25.2d,v17.2d 1292 ld1 {v24.2d},[x3],#16 1293 ext v25.16b,v25.16b,v25.16b,#8 1294 ext v5.16b,v1.16b,v4.16b,#8 1295 ext v6.16b,v3.16b,v1.16b,#8 1296 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1297 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1298 ext v7.16b,v21.16b,v22.16b,#8 1299 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1300 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1301 add v0.2d,v3.2d,v4.2d // "D + T1" 1302 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1303 add v24.2d,v24.2d,v18.2d 1304 ld1 {v25.2d},[x3],#16 1305 ext v24.16b,v24.16b,v24.16b,#8 1306 ext v5.16b,v0.16b,v1.16b,#8 1307 ext v6.16b,v2.16b,v0.16b,#8 1308 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1309 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1310 ext v7.16b,v22.16b,v23.16b,#8 1311 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1312 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1313 add v3.2d,v2.2d,v1.2d // "D + T1" 1314 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1315 add v25.2d,v25.2d,v19.2d 1316 ld1 {v24.2d},[x3],#16 1317 ext v25.16b,v25.16b,v25.16b,#8 1318 ext v5.16b,v3.16b,v0.16b,#8 1319 ext v6.16b,v4.16b,v3.16b,#8 1320 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1321 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1322 ext v7.16b,v23.16b,v16.16b,#8 1323 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1324 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1325 add v2.2d,v4.2d,v0.2d // "D + T1" 1326 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1327 add v24.2d,v24.2d,v20.2d 1328 ld1 {v25.2d},[x3],#16 1329 ext v24.16b,v24.16b,v24.16b,#8 1330 ext v5.16b,v2.16b,v3.16b,#8 1331 ext v6.16b,v1.16b,v2.16b,#8 1332 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1333 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1334 ext v7.16b,v16.16b,v17.16b,#8 1335 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1336 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1337 add v4.2d,v1.2d,v3.2d // "D + T1" 1338 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1339 add v25.2d,v25.2d,v21.2d 1340 ld1 {v24.2d},[x3],#16 1341 ext v25.16b,v25.16b,v25.16b,#8 1342 ext v5.16b,v4.16b,v2.16b,#8 1343 ext v6.16b,v0.16b,v4.16b,#8 1344 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1345 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1346 ext v7.16b,v17.16b,v18.16b,#8 1347 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1348 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1349 add v1.2d,v0.2d,v2.2d // "D + T1" 1350 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1351 add v24.2d,v24.2d,v22.2d 1352 ld1 {v25.2d},[x3],#16 1353 ext v24.16b,v24.16b,v24.16b,#8 1354 ext v5.16b,v1.16b,v4.16b,#8 1355 ext v6.16b,v3.16b,v1.16b,#8 1356 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1357 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1358 ext v7.16b,v18.16b,v19.16b,#8 1359 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1360 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1361 add v0.2d,v3.2d,v4.2d // "D + T1" 1362 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1363 add v25.2d,v25.2d,v23.2d 1364 ld1 {v24.2d},[x3],#16 1365 ext v25.16b,v25.16b,v25.16b,#8 1366 ext v5.16b,v0.16b,v1.16b,#8 1367 ext v6.16b,v2.16b,v0.16b,#8 1368 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1369 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1370 ext v7.16b,v19.16b,v20.16b,#8 1371 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1372 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1373 add v3.2d,v2.2d,v1.2d // "D + T1" 1374 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1375 add v24.2d,v24.2d,v16.2d 1376 ld1 {v25.2d},[x3],#16 1377 ext v24.16b,v24.16b,v24.16b,#8 1378 ext v5.16b,v3.16b,v0.16b,#8 1379 ext v6.16b,v4.16b,v3.16b,#8 1380 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1381 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1382 ext v7.16b,v20.16b,v21.16b,#8 1383 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1384 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1385 add v2.2d,v4.2d,v0.2d // "D + T1" 1386 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1387 add v25.2d,v25.2d,v17.2d 1388 ld1 {v24.2d},[x3],#16 1389 ext v25.16b,v25.16b,v25.16b,#8 1390 ext v5.16b,v2.16b,v3.16b,#8 1391 ext v6.16b,v1.16b,v2.16b,#8 1392 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1393 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1394 ext v7.16b,v21.16b,v22.16b,#8 1395 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1396 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1397 add v4.2d,v1.2d,v3.2d // "D + T1" 1398 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1399 add v24.2d,v24.2d,v18.2d 1400 ld1 {v25.2d},[x3],#16 1401 ext v24.16b,v24.16b,v24.16b,#8 1402 ext v5.16b,v4.16b,v2.16b,#8 1403 ext v6.16b,v0.16b,v4.16b,#8 1404 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1405 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1406 ext v7.16b,v22.16b,v23.16b,#8 1407 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1408 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1409 add v1.2d,v0.2d,v2.2d // "D + T1" 1410 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1411 add v25.2d,v25.2d,v19.2d 1412 ld1 {v24.2d},[x3],#16 1413 ext v25.16b,v25.16b,v25.16b,#8 1414 ext v5.16b,v1.16b,v4.16b,#8 1415 ext v6.16b,v3.16b,v1.16b,#8 1416 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1417 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1418 ext v7.16b,v23.16b,v16.16b,#8 1419 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1420 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1421 add v0.2d,v3.2d,v4.2d // "D + T1" 1422 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1423 add v24.2d,v24.2d,v20.2d 1424 ld1 {v25.2d},[x3],#16 1425 ext v24.16b,v24.16b,v24.16b,#8 1426 ext v5.16b,v0.16b,v1.16b,#8 1427 ext v6.16b,v2.16b,v0.16b,#8 1428 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1429 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1430 ext v7.16b,v16.16b,v17.16b,#8 1431 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1432 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1433 add v3.2d,v2.2d,v1.2d // "D + T1" 1434 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1435 add v25.2d,v25.2d,v21.2d 1436 ld1 {v24.2d},[x3],#16 1437 ext v25.16b,v25.16b,v25.16b,#8 1438 ext v5.16b,v3.16b,v0.16b,#8 1439 ext v6.16b,v4.16b,v3.16b,#8 1440 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1441 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1442 ext v7.16b,v17.16b,v18.16b,#8 1443 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1444 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1445 add v2.2d,v4.2d,v0.2d // "D + T1" 1446 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1447 add v24.2d,v24.2d,v22.2d 1448 ld1 {v25.2d},[x3],#16 1449 ext v24.16b,v24.16b,v24.16b,#8 1450 ext v5.16b,v2.16b,v3.16b,#8 1451 ext v6.16b,v1.16b,v2.16b,#8 1452 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1453 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1454 ext v7.16b,v18.16b,v19.16b,#8 1455 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1456 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1457 add v4.2d,v1.2d,v3.2d // "D + T1" 1458 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1459 add v25.2d,v25.2d,v23.2d 1460 ld1 {v24.2d},[x3],#16 1461 ext v25.16b,v25.16b,v25.16b,#8 1462 ext v5.16b,v4.16b,v2.16b,#8 1463 ext v6.16b,v0.16b,v4.16b,#8 1464 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1465 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1466 ext v7.16b,v19.16b,v20.16b,#8 1467 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1468 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1469 add v1.2d,v0.2d,v2.2d // "D + T1" 1470 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1471 ld1 {v25.2d},[x3],#16 1472 add v24.2d,v24.2d,v16.2d 1473 ld1 {v16.16b},[x1],#16 // load next input 1474 ext v24.16b,v24.16b,v24.16b,#8 1475 ext v5.16b,v1.16b,v4.16b,#8 1476 ext v6.16b,v3.16b,v1.16b,#8 1477 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1478 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1479 rev64 v16.16b,v16.16b 1480 add v0.2d,v3.2d,v4.2d // "D + T1" 1481 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1482 ld1 {v24.2d},[x3],#16 1483 add v25.2d,v25.2d,v17.2d 1484 ld1 {v17.16b},[x1],#16 // load next input 1485 ext v25.16b,v25.16b,v25.16b,#8 1486 ext v5.16b,v0.16b,v1.16b,#8 1487 ext v6.16b,v2.16b,v0.16b,#8 1488 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1489 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1490 rev64 v17.16b,v17.16b 1491 add v3.2d,v2.2d,v1.2d // "D + T1" 1492 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1493 ld1 {v25.2d},[x3],#16 1494 add v24.2d,v24.2d,v18.2d 1495 ld1 {v18.16b},[x1],#16 // load next input 1496 ext v24.16b,v24.16b,v24.16b,#8 1497 ext v5.16b,v3.16b,v0.16b,#8 1498 ext v6.16b,v4.16b,v3.16b,#8 1499 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1500 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1501 rev64 v18.16b,v18.16b 1502 add v2.2d,v4.2d,v0.2d // "D + T1" 1503 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1504 ld1 {v24.2d},[x3],#16 1505 add v25.2d,v25.2d,v19.2d 1506 ld1 {v19.16b},[x1],#16 // load next input 1507 ext v25.16b,v25.16b,v25.16b,#8 1508 ext v5.16b,v2.16b,v3.16b,#8 1509 ext v6.16b,v1.16b,v2.16b,#8 1510 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1511 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1512 rev64 v19.16b,v19.16b 1513 add v4.2d,v1.2d,v3.2d // "D + T1" 1514 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1515 ld1 {v25.2d},[x3],#16 1516 add v24.2d,v24.2d,v20.2d 1517 ld1 {v20.16b},[x1],#16 // load next input 1518 ext v24.16b,v24.16b,v24.16b,#8 1519 ext v5.16b,v4.16b,v2.16b,#8 1520 ext v6.16b,v0.16b,v4.16b,#8 1521 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1522 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1523 rev64 v20.16b,v20.16b 1524 add v1.2d,v0.2d,v2.2d // "D + T1" 1525 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1526 ld1 {v24.2d},[x3],#16 1527 add v25.2d,v25.2d,v21.2d 1528 ld1 {v21.16b},[x1],#16 // load next input 1529 ext v25.16b,v25.16b,v25.16b,#8 1530 ext v5.16b,v1.16b,v4.16b,#8 1531 ext v6.16b,v3.16b,v1.16b,#8 1532 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1533 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1534 rev64 v21.16b,v21.16b 1535 add v0.2d,v3.2d,v4.2d // "D + T1" 1536 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1537 ld1 {v25.2d},[x3],#16 1538 add v24.2d,v24.2d,v22.2d 1539 ld1 {v22.16b},[x1],#16 // load next input 1540 ext v24.16b,v24.16b,v24.16b,#8 1541 ext v5.16b,v0.16b,v1.16b,#8 1542 ext v6.16b,v2.16b,v0.16b,#8 1543 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1544 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1545 rev64 v22.16b,v22.16b 1546 add v3.2d,v2.2d,v1.2d // "D + T1" 1547 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1548 sub x3,x3,#80*8 // rewind 1549 add v25.2d,v25.2d,v23.2d 1550 ld1 {v23.16b},[x1],#16 // load next input 1551 ext v25.16b,v25.16b,v25.16b,#8 1552 ext v5.16b,v3.16b,v0.16b,#8 1553 ext v6.16b,v4.16b,v3.16b,#8 1554 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1555 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1556 rev64 v23.16b,v23.16b 1557 add v2.2d,v4.2d,v0.2d // "D + T1" 1558 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1559 add v0.2d,v0.2d,v26.2d // accumulate 1560 add v1.2d,v1.2d,v27.2d 1561 add v2.2d,v2.2d,v28.2d 1562 add v3.2d,v3.2d,v29.2d 1563 1564 cbnz x2,.Loop_hw 1565 1566 st1 {v0.2d-v3.2d},[x0] // store context 1567 1568 ldr x29,[sp],#16 1569 ret 1570.size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8 1571#endif 1572