1/* 2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * https://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 19 * - modified assembly to fit into OpenZFS 20 */ 21 22#if defined(__aarch64__) 23 24 .section .note.gnu.property,"a",@note 25 .p2align 3 26 .word 4 27 .word 16 28 .word 5 29 .asciz "GNU" 30 .word 3221225472 31 .word 4 32 .word 3 33 .word 0 34.text 35 36.align 6 37.type .LK512,%object 38.LK512: 39 .quad 0x428a2f98d728ae22,0x7137449123ef65cd 40 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 41 .quad 0x3956c25bf348b538,0x59f111f1b605d019 42 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 43 .quad 0xd807aa98a3030242,0x12835b0145706fbe 44 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 45 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 46 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 47 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 48 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 49 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 50 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 51 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 52 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 53 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 54 .quad 0x06ca6351e003826f,0x142929670a0e6e70 55 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 56 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 57 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 58 .quad 0x81c2c92e47edaee6,0x92722c851482353b 59 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 60 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 61 .quad 0xd192e819d6ef5218,0xd69906245565a910 62 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 63 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 64 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 65 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 66 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 67 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 68 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 69 .quad 0x90befffa23631e28,0xa4506cebde82bde9 70 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 71 .quad 0xca273eceea26619c,0xd186b8c721c0c207 72 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 73 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 74 .quad 0x113f9804bef90dae,0x1b710b35131c471b 75 .quad 0x28db77f523047d84,0x32caab7b40c72493 76 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 77 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 78 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 79 .quad 0 // terminator 80.size .LK512,.-.LK512 81 82.globl zfs_sha512_block_armv7 83.type zfs_sha512_block_armv7,%function 84.align 6 85zfs_sha512_block_armv7: 86 hint #34 // bti c 87 stp x29,x30,[sp,#-128]! 88 add x29,sp,#0 89 90 stp x19,x20,[sp,#16] 91 stp x21,x22,[sp,#32] 92 stp x23,x24,[sp,#48] 93 stp x25,x26,[sp,#64] 94 stp x27,x28,[sp,#80] 95 sub sp,sp,#4*8 96 97 ldp x20,x21,[x0] // load context 98 ldp x22,x23,[x0,#2*8] 99 ldp x24,x25,[x0,#4*8] 100 add x2,x1,x2,lsl#7 // end of input 101 ldp x26,x27,[x0,#6*8] 102 adr x30,.LK512 103 stp x0,x2,[x29,#96] 104 105.Loop: 106 ldp x3,x4,[x1],#2*8 107 ldr x19,[x30],#8 // *K++ 108 eor x28,x21,x22 // magic seed 109 str x1,[x29,#112] 110#ifndef __AARCH64EB__ 111 rev x3,x3 // 0 112#endif 113 ror x16,x24,#14 114 add x27,x27,x19 // h+=K[i] 115 eor x6,x24,x24,ror#23 116 and x17,x25,x24 117 bic x19,x26,x24 118 add x27,x27,x3 // h+=X[i] 119 orr x17,x17,x19 // Ch(e,f,g) 120 eor x19,x20,x21 // a^b, b^c in next round 121 eor x16,x16,x6,ror#18 // Sigma1(e) 122 ror x6,x20,#28 123 add x27,x27,x17 // h+=Ch(e,f,g) 124 eor x17,x20,x20,ror#5 125 add x27,x27,x16 // h+=Sigma1(e) 126 and x28,x28,x19 // (b^c)&=(a^b) 127 add x23,x23,x27 // d+=h 128 eor x28,x28,x21 // Maj(a,b,c) 129 eor x17,x6,x17,ror#34 // Sigma0(a) 130 add x27,x27,x28 // h+=Maj(a,b,c) 131 ldr x28,[x30],#8 // *K++, x19 in next round 132 //add x27,x27,x17 // h+=Sigma0(a) 133#ifndef __AARCH64EB__ 134 rev x4,x4 // 1 135#endif 136 ldp x5,x6,[x1],#2*8 137 add x27,x27,x17 // h+=Sigma0(a) 138 ror x16,x23,#14 139 add x26,x26,x28 // h+=K[i] 140 eor x7,x23,x23,ror#23 141 and x17,x24,x23 142 bic x28,x25,x23 143 add x26,x26,x4 // h+=X[i] 144 orr x17,x17,x28 // Ch(e,f,g) 145 eor x28,x27,x20 // a^b, b^c in next round 146 eor x16,x16,x7,ror#18 // Sigma1(e) 147 ror x7,x27,#28 148 add x26,x26,x17 // h+=Ch(e,f,g) 149 eor x17,x27,x27,ror#5 150 add x26,x26,x16 // h+=Sigma1(e) 151 and x19,x19,x28 // (b^c)&=(a^b) 152 add x22,x22,x26 // d+=h 153 eor x19,x19,x20 // Maj(a,b,c) 154 eor x17,x7,x17,ror#34 // Sigma0(a) 155 add x26,x26,x19 // h+=Maj(a,b,c) 156 ldr x19,[x30],#8 // *K++, x28 in next round 157 //add x26,x26,x17 // h+=Sigma0(a) 158#ifndef __AARCH64EB__ 159 rev x5,x5 // 2 160#endif 161 add x26,x26,x17 // h+=Sigma0(a) 162 ror x16,x22,#14 163 add x25,x25,x19 // h+=K[i] 164 eor x8,x22,x22,ror#23 165 and x17,x23,x22 166 bic x19,x24,x22 167 add x25,x25,x5 // h+=X[i] 168 orr x17,x17,x19 // Ch(e,f,g) 169 eor x19,x26,x27 // a^b, b^c in next round 170 eor x16,x16,x8,ror#18 // Sigma1(e) 171 ror x8,x26,#28 172 add x25,x25,x17 // h+=Ch(e,f,g) 173 eor x17,x26,x26,ror#5 174 add x25,x25,x16 // h+=Sigma1(e) 175 and x28,x28,x19 // (b^c)&=(a^b) 176 add x21,x21,x25 // d+=h 177 eor x28,x28,x27 // Maj(a,b,c) 178 eor x17,x8,x17,ror#34 // Sigma0(a) 179 add x25,x25,x28 // h+=Maj(a,b,c) 180 ldr x28,[x30],#8 // *K++, x19 in next round 181 //add x25,x25,x17 // h+=Sigma0(a) 182#ifndef __AARCH64EB__ 183 rev x6,x6 // 3 184#endif 185 ldp x7,x8,[x1],#2*8 186 add x25,x25,x17 // h+=Sigma0(a) 187 ror x16,x21,#14 188 add x24,x24,x28 // h+=K[i] 189 eor x9,x21,x21,ror#23 190 and x17,x22,x21 191 bic x28,x23,x21 192 add x24,x24,x6 // h+=X[i] 193 orr x17,x17,x28 // Ch(e,f,g) 194 eor x28,x25,x26 // a^b, b^c in next round 195 eor x16,x16,x9,ror#18 // Sigma1(e) 196 ror x9,x25,#28 197 add x24,x24,x17 // h+=Ch(e,f,g) 198 eor x17,x25,x25,ror#5 199 add x24,x24,x16 // h+=Sigma1(e) 200 and x19,x19,x28 // (b^c)&=(a^b) 201 add x20,x20,x24 // d+=h 202 eor x19,x19,x26 // Maj(a,b,c) 203 eor x17,x9,x17,ror#34 // Sigma0(a) 204 add x24,x24,x19 // h+=Maj(a,b,c) 205 ldr x19,[x30],#8 // *K++, x28 in next round 206 //add x24,x24,x17 // h+=Sigma0(a) 207#ifndef __AARCH64EB__ 208 rev x7,x7 // 4 209#endif 210 add x24,x24,x17 // h+=Sigma0(a) 211 ror x16,x20,#14 212 add x23,x23,x19 // h+=K[i] 213 eor x10,x20,x20,ror#23 214 and x17,x21,x20 215 bic x19,x22,x20 216 add x23,x23,x7 // h+=X[i] 217 orr x17,x17,x19 // Ch(e,f,g) 218 eor x19,x24,x25 // a^b, b^c in next round 219 eor x16,x16,x10,ror#18 // Sigma1(e) 220 ror x10,x24,#28 221 add x23,x23,x17 // h+=Ch(e,f,g) 222 eor x17,x24,x24,ror#5 223 add x23,x23,x16 // h+=Sigma1(e) 224 and x28,x28,x19 // (b^c)&=(a^b) 225 add x27,x27,x23 // d+=h 226 eor x28,x28,x25 // Maj(a,b,c) 227 eor x17,x10,x17,ror#34 // Sigma0(a) 228 add x23,x23,x28 // h+=Maj(a,b,c) 229 ldr x28,[x30],#8 // *K++, x19 in next round 230 //add x23,x23,x17 // h+=Sigma0(a) 231#ifndef __AARCH64EB__ 232 rev x8,x8 // 5 233#endif 234 ldp x9,x10,[x1],#2*8 235 add x23,x23,x17 // h+=Sigma0(a) 236 ror x16,x27,#14 237 add x22,x22,x28 // h+=K[i] 238 eor x11,x27,x27,ror#23 239 and x17,x20,x27 240 bic x28,x21,x27 241 add x22,x22,x8 // h+=X[i] 242 orr x17,x17,x28 // Ch(e,f,g) 243 eor x28,x23,x24 // a^b, b^c in next round 244 eor x16,x16,x11,ror#18 // Sigma1(e) 245 ror x11,x23,#28 246 add x22,x22,x17 // h+=Ch(e,f,g) 247 eor x17,x23,x23,ror#5 248 add x22,x22,x16 // h+=Sigma1(e) 249 and x19,x19,x28 // (b^c)&=(a^b) 250 add x26,x26,x22 // d+=h 251 eor x19,x19,x24 // Maj(a,b,c) 252 eor x17,x11,x17,ror#34 // Sigma0(a) 253 add x22,x22,x19 // h+=Maj(a,b,c) 254 ldr x19,[x30],#8 // *K++, x28 in next round 255 //add x22,x22,x17 // h+=Sigma0(a) 256#ifndef __AARCH64EB__ 257 rev x9,x9 // 6 258#endif 259 add x22,x22,x17 // h+=Sigma0(a) 260 ror x16,x26,#14 261 add x21,x21,x19 // h+=K[i] 262 eor x12,x26,x26,ror#23 263 and x17,x27,x26 264 bic x19,x20,x26 265 add x21,x21,x9 // h+=X[i] 266 orr x17,x17,x19 // Ch(e,f,g) 267 eor x19,x22,x23 // a^b, b^c in next round 268 eor x16,x16,x12,ror#18 // Sigma1(e) 269 ror x12,x22,#28 270 add x21,x21,x17 // h+=Ch(e,f,g) 271 eor x17,x22,x22,ror#5 272 add x21,x21,x16 // h+=Sigma1(e) 273 and x28,x28,x19 // (b^c)&=(a^b) 274 add x25,x25,x21 // d+=h 275 eor x28,x28,x23 // Maj(a,b,c) 276 eor x17,x12,x17,ror#34 // Sigma0(a) 277 add x21,x21,x28 // h+=Maj(a,b,c) 278 ldr x28,[x30],#8 // *K++, x19 in next round 279 //add x21,x21,x17 // h+=Sigma0(a) 280#ifndef __AARCH64EB__ 281 rev x10,x10 // 7 282#endif 283 ldp x11,x12,[x1],#2*8 284 add x21,x21,x17 // h+=Sigma0(a) 285 ror x16,x25,#14 286 add x20,x20,x28 // h+=K[i] 287 eor x13,x25,x25,ror#23 288 and x17,x26,x25 289 bic x28,x27,x25 290 add x20,x20,x10 // h+=X[i] 291 orr x17,x17,x28 // Ch(e,f,g) 292 eor x28,x21,x22 // a^b, b^c in next round 293 eor x16,x16,x13,ror#18 // Sigma1(e) 294 ror x13,x21,#28 295 add x20,x20,x17 // h+=Ch(e,f,g) 296 eor x17,x21,x21,ror#5 297 add x20,x20,x16 // h+=Sigma1(e) 298 and x19,x19,x28 // (b^c)&=(a^b) 299 add x24,x24,x20 // d+=h 300 eor x19,x19,x22 // Maj(a,b,c) 301 eor x17,x13,x17,ror#34 // Sigma0(a) 302 add x20,x20,x19 // h+=Maj(a,b,c) 303 ldr x19,[x30],#8 // *K++, x28 in next round 304 //add x20,x20,x17 // h+=Sigma0(a) 305#ifndef __AARCH64EB__ 306 rev x11,x11 // 8 307#endif 308 add x20,x20,x17 // h+=Sigma0(a) 309 ror x16,x24,#14 310 add x27,x27,x19 // h+=K[i] 311 eor x14,x24,x24,ror#23 312 and x17,x25,x24 313 bic x19,x26,x24 314 add x27,x27,x11 // h+=X[i] 315 orr x17,x17,x19 // Ch(e,f,g) 316 eor x19,x20,x21 // a^b, b^c in next round 317 eor x16,x16,x14,ror#18 // Sigma1(e) 318 ror x14,x20,#28 319 add x27,x27,x17 // h+=Ch(e,f,g) 320 eor x17,x20,x20,ror#5 321 add x27,x27,x16 // h+=Sigma1(e) 322 and x28,x28,x19 // (b^c)&=(a^b) 323 add x23,x23,x27 // d+=h 324 eor x28,x28,x21 // Maj(a,b,c) 325 eor x17,x14,x17,ror#34 // Sigma0(a) 326 add x27,x27,x28 // h+=Maj(a,b,c) 327 ldr x28,[x30],#8 // *K++, x19 in next round 328 //add x27,x27,x17 // h+=Sigma0(a) 329#ifndef __AARCH64EB__ 330 rev x12,x12 // 9 331#endif 332 ldp x13,x14,[x1],#2*8 333 add x27,x27,x17 // h+=Sigma0(a) 334 ror x16,x23,#14 335 add x26,x26,x28 // h+=K[i] 336 eor x15,x23,x23,ror#23 337 and x17,x24,x23 338 bic x28,x25,x23 339 add x26,x26,x12 // h+=X[i] 340 orr x17,x17,x28 // Ch(e,f,g) 341 eor x28,x27,x20 // a^b, b^c in next round 342 eor x16,x16,x15,ror#18 // Sigma1(e) 343 ror x15,x27,#28 344 add x26,x26,x17 // h+=Ch(e,f,g) 345 eor x17,x27,x27,ror#5 346 add x26,x26,x16 // h+=Sigma1(e) 347 and x19,x19,x28 // (b^c)&=(a^b) 348 add x22,x22,x26 // d+=h 349 eor x19,x19,x20 // Maj(a,b,c) 350 eor x17,x15,x17,ror#34 // Sigma0(a) 351 add x26,x26,x19 // h+=Maj(a,b,c) 352 ldr x19,[x30],#8 // *K++, x28 in next round 353 //add x26,x26,x17 // h+=Sigma0(a) 354#ifndef __AARCH64EB__ 355 rev x13,x13 // 10 356#endif 357 add x26,x26,x17 // h+=Sigma0(a) 358 ror x16,x22,#14 359 add x25,x25,x19 // h+=K[i] 360 eor x0,x22,x22,ror#23 361 and x17,x23,x22 362 bic x19,x24,x22 363 add x25,x25,x13 // h+=X[i] 364 orr x17,x17,x19 // Ch(e,f,g) 365 eor x19,x26,x27 // a^b, b^c in next round 366 eor x16,x16,x0,ror#18 // Sigma1(e) 367 ror x0,x26,#28 368 add x25,x25,x17 // h+=Ch(e,f,g) 369 eor x17,x26,x26,ror#5 370 add x25,x25,x16 // h+=Sigma1(e) 371 and x28,x28,x19 // (b^c)&=(a^b) 372 add x21,x21,x25 // d+=h 373 eor x28,x28,x27 // Maj(a,b,c) 374 eor x17,x0,x17,ror#34 // Sigma0(a) 375 add x25,x25,x28 // h+=Maj(a,b,c) 376 ldr x28,[x30],#8 // *K++, x19 in next round 377 //add x25,x25,x17 // h+=Sigma0(a) 378#ifndef __AARCH64EB__ 379 rev x14,x14 // 11 380#endif 381 ldp x15,x0,[x1],#2*8 382 add x25,x25,x17 // h+=Sigma0(a) 383 str x6,[sp,#24] 384 ror x16,x21,#14 385 add x24,x24,x28 // h+=K[i] 386 eor x6,x21,x21,ror#23 387 and x17,x22,x21 388 bic x28,x23,x21 389 add x24,x24,x14 // h+=X[i] 390 orr x17,x17,x28 // Ch(e,f,g) 391 eor x28,x25,x26 // a^b, b^c in next round 392 eor x16,x16,x6,ror#18 // Sigma1(e) 393 ror x6,x25,#28 394 add x24,x24,x17 // h+=Ch(e,f,g) 395 eor x17,x25,x25,ror#5 396 add x24,x24,x16 // h+=Sigma1(e) 397 and x19,x19,x28 // (b^c)&=(a^b) 398 add x20,x20,x24 // d+=h 399 eor x19,x19,x26 // Maj(a,b,c) 400 eor x17,x6,x17,ror#34 // Sigma0(a) 401 add x24,x24,x19 // h+=Maj(a,b,c) 402 ldr x19,[x30],#8 // *K++, x28 in next round 403 //add x24,x24,x17 // h+=Sigma0(a) 404#ifndef __AARCH64EB__ 405 rev x15,x15 // 12 406#endif 407 add x24,x24,x17 // h+=Sigma0(a) 408 str x7,[sp,#0] 409 ror x16,x20,#14 410 add x23,x23,x19 // h+=K[i] 411 eor x7,x20,x20,ror#23 412 and x17,x21,x20 413 bic x19,x22,x20 414 add x23,x23,x15 // h+=X[i] 415 orr x17,x17,x19 // Ch(e,f,g) 416 eor x19,x24,x25 // a^b, b^c in next round 417 eor x16,x16,x7,ror#18 // Sigma1(e) 418 ror x7,x24,#28 419 add x23,x23,x17 // h+=Ch(e,f,g) 420 eor x17,x24,x24,ror#5 421 add x23,x23,x16 // h+=Sigma1(e) 422 and x28,x28,x19 // (b^c)&=(a^b) 423 add x27,x27,x23 // d+=h 424 eor x28,x28,x25 // Maj(a,b,c) 425 eor x17,x7,x17,ror#34 // Sigma0(a) 426 add x23,x23,x28 // h+=Maj(a,b,c) 427 ldr x28,[x30],#8 // *K++, x19 in next round 428 //add x23,x23,x17 // h+=Sigma0(a) 429#ifndef __AARCH64EB__ 430 rev x0,x0 // 13 431#endif 432 ldp x1,x2,[x1] 433 add x23,x23,x17 // h+=Sigma0(a) 434 str x8,[sp,#8] 435 ror x16,x27,#14 436 add x22,x22,x28 // h+=K[i] 437 eor x8,x27,x27,ror#23 438 and x17,x20,x27 439 bic x28,x21,x27 440 add x22,x22,x0 // h+=X[i] 441 orr x17,x17,x28 // Ch(e,f,g) 442 eor x28,x23,x24 // a^b, b^c in next round 443 eor x16,x16,x8,ror#18 // Sigma1(e) 444 ror x8,x23,#28 445 add x22,x22,x17 // h+=Ch(e,f,g) 446 eor x17,x23,x23,ror#5 447 add x22,x22,x16 // h+=Sigma1(e) 448 and x19,x19,x28 // (b^c)&=(a^b) 449 add x26,x26,x22 // d+=h 450 eor x19,x19,x24 // Maj(a,b,c) 451 eor x17,x8,x17,ror#34 // Sigma0(a) 452 add x22,x22,x19 // h+=Maj(a,b,c) 453 ldr x19,[x30],#8 // *K++, x28 in next round 454 //add x22,x22,x17 // h+=Sigma0(a) 455#ifndef __AARCH64EB__ 456 rev x1,x1 // 14 457#endif 458 ldr x6,[sp,#24] 459 add x22,x22,x17 // h+=Sigma0(a) 460 str x9,[sp,#16] 461 ror x16,x26,#14 462 add x21,x21,x19 // h+=K[i] 463 eor x9,x26,x26,ror#23 464 and x17,x27,x26 465 bic x19,x20,x26 466 add x21,x21,x1 // h+=X[i] 467 orr x17,x17,x19 // Ch(e,f,g) 468 eor x19,x22,x23 // a^b, b^c in next round 469 eor x16,x16,x9,ror#18 // Sigma1(e) 470 ror x9,x22,#28 471 add x21,x21,x17 // h+=Ch(e,f,g) 472 eor x17,x22,x22,ror#5 473 add x21,x21,x16 // h+=Sigma1(e) 474 and x28,x28,x19 // (b^c)&=(a^b) 475 add x25,x25,x21 // d+=h 476 eor x28,x28,x23 // Maj(a,b,c) 477 eor x17,x9,x17,ror#34 // Sigma0(a) 478 add x21,x21,x28 // h+=Maj(a,b,c) 479 ldr x28,[x30],#8 // *K++, x19 in next round 480 //add x21,x21,x17 // h+=Sigma0(a) 481#ifndef __AARCH64EB__ 482 rev x2,x2 // 15 483#endif 484 ldr x7,[sp,#0] 485 add x21,x21,x17 // h+=Sigma0(a) 486 str x10,[sp,#24] 487 ror x16,x25,#14 488 add x20,x20,x28 // h+=K[i] 489 ror x9,x4,#1 490 and x17,x26,x25 491 ror x8,x1,#19 492 bic x28,x27,x25 493 ror x10,x21,#28 494 add x20,x20,x2 // h+=X[i] 495 eor x16,x16,x25,ror#18 496 eor x9,x9,x4,ror#8 497 orr x17,x17,x28 // Ch(e,f,g) 498 eor x28,x21,x22 // a^b, b^c in next round 499 eor x16,x16,x25,ror#41 // Sigma1(e) 500 eor x10,x10,x21,ror#34 501 add x20,x20,x17 // h+=Ch(e,f,g) 502 and x19,x19,x28 // (b^c)&=(a^b) 503 eor x8,x8,x1,ror#61 504 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 505 add x20,x20,x16 // h+=Sigma1(e) 506 eor x19,x19,x22 // Maj(a,b,c) 507 eor x17,x10,x21,ror#39 // Sigma0(a) 508 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 509 add x3,x3,x12 510 add x24,x24,x20 // d+=h 511 add x20,x20,x19 // h+=Maj(a,b,c) 512 ldr x19,[x30],#8 // *K++, x28 in next round 513 add x3,x3,x9 514 add x20,x20,x17 // h+=Sigma0(a) 515 add x3,x3,x8 516.Loop_16_xx: 517 ldr x8,[sp,#8] 518 str x11,[sp,#0] 519 ror x16,x24,#14 520 add x27,x27,x19 // h+=K[i] 521 ror x10,x5,#1 522 and x17,x25,x24 523 ror x9,x2,#19 524 bic x19,x26,x24 525 ror x11,x20,#28 526 add x27,x27,x3 // h+=X[i] 527 eor x16,x16,x24,ror#18 528 eor x10,x10,x5,ror#8 529 orr x17,x17,x19 // Ch(e,f,g) 530 eor x19,x20,x21 // a^b, b^c in next round 531 eor x16,x16,x24,ror#41 // Sigma1(e) 532 eor x11,x11,x20,ror#34 533 add x27,x27,x17 // h+=Ch(e,f,g) 534 and x28,x28,x19 // (b^c)&=(a^b) 535 eor x9,x9,x2,ror#61 536 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 537 add x27,x27,x16 // h+=Sigma1(e) 538 eor x28,x28,x21 // Maj(a,b,c) 539 eor x17,x11,x20,ror#39 // Sigma0(a) 540 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 541 add x4,x4,x13 542 add x23,x23,x27 // d+=h 543 add x27,x27,x28 // h+=Maj(a,b,c) 544 ldr x28,[x30],#8 // *K++, x19 in next round 545 add x4,x4,x10 546 add x27,x27,x17 // h+=Sigma0(a) 547 add x4,x4,x9 548 ldr x9,[sp,#16] 549 str x12,[sp,#8] 550 ror x16,x23,#14 551 add x26,x26,x28 // h+=K[i] 552 ror x11,x6,#1 553 and x17,x24,x23 554 ror x10,x3,#19 555 bic x28,x25,x23 556 ror x12,x27,#28 557 add x26,x26,x4 // h+=X[i] 558 eor x16,x16,x23,ror#18 559 eor x11,x11,x6,ror#8 560 orr x17,x17,x28 // Ch(e,f,g) 561 eor x28,x27,x20 // a^b, b^c in next round 562 eor x16,x16,x23,ror#41 // Sigma1(e) 563 eor x12,x12,x27,ror#34 564 add x26,x26,x17 // h+=Ch(e,f,g) 565 and x19,x19,x28 // (b^c)&=(a^b) 566 eor x10,x10,x3,ror#61 567 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 568 add x26,x26,x16 // h+=Sigma1(e) 569 eor x19,x19,x20 // Maj(a,b,c) 570 eor x17,x12,x27,ror#39 // Sigma0(a) 571 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 572 add x5,x5,x14 573 add x22,x22,x26 // d+=h 574 add x26,x26,x19 // h+=Maj(a,b,c) 575 ldr x19,[x30],#8 // *K++, x28 in next round 576 add x5,x5,x11 577 add x26,x26,x17 // h+=Sigma0(a) 578 add x5,x5,x10 579 ldr x10,[sp,#24] 580 str x13,[sp,#16] 581 ror x16,x22,#14 582 add x25,x25,x19 // h+=K[i] 583 ror x12,x7,#1 584 and x17,x23,x22 585 ror x11,x4,#19 586 bic x19,x24,x22 587 ror x13,x26,#28 588 add x25,x25,x5 // h+=X[i] 589 eor x16,x16,x22,ror#18 590 eor x12,x12,x7,ror#8 591 orr x17,x17,x19 // Ch(e,f,g) 592 eor x19,x26,x27 // a^b, b^c in next round 593 eor x16,x16,x22,ror#41 // Sigma1(e) 594 eor x13,x13,x26,ror#34 595 add x25,x25,x17 // h+=Ch(e,f,g) 596 and x28,x28,x19 // (b^c)&=(a^b) 597 eor x11,x11,x4,ror#61 598 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 599 add x25,x25,x16 // h+=Sigma1(e) 600 eor x28,x28,x27 // Maj(a,b,c) 601 eor x17,x13,x26,ror#39 // Sigma0(a) 602 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 603 add x6,x6,x15 604 add x21,x21,x25 // d+=h 605 add x25,x25,x28 // h+=Maj(a,b,c) 606 ldr x28,[x30],#8 // *K++, x19 in next round 607 add x6,x6,x12 608 add x25,x25,x17 // h+=Sigma0(a) 609 add x6,x6,x11 610 ldr x11,[sp,#0] 611 str x14,[sp,#24] 612 ror x16,x21,#14 613 add x24,x24,x28 // h+=K[i] 614 ror x13,x8,#1 615 and x17,x22,x21 616 ror x12,x5,#19 617 bic x28,x23,x21 618 ror x14,x25,#28 619 add x24,x24,x6 // h+=X[i] 620 eor x16,x16,x21,ror#18 621 eor x13,x13,x8,ror#8 622 orr x17,x17,x28 // Ch(e,f,g) 623 eor x28,x25,x26 // a^b, b^c in next round 624 eor x16,x16,x21,ror#41 // Sigma1(e) 625 eor x14,x14,x25,ror#34 626 add x24,x24,x17 // h+=Ch(e,f,g) 627 and x19,x19,x28 // (b^c)&=(a^b) 628 eor x12,x12,x5,ror#61 629 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 630 add x24,x24,x16 // h+=Sigma1(e) 631 eor x19,x19,x26 // Maj(a,b,c) 632 eor x17,x14,x25,ror#39 // Sigma0(a) 633 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 634 add x7,x7,x0 635 add x20,x20,x24 // d+=h 636 add x24,x24,x19 // h+=Maj(a,b,c) 637 ldr x19,[x30],#8 // *K++, x28 in next round 638 add x7,x7,x13 639 add x24,x24,x17 // h+=Sigma0(a) 640 add x7,x7,x12 641 ldr x12,[sp,#8] 642 str x15,[sp,#0] 643 ror x16,x20,#14 644 add x23,x23,x19 // h+=K[i] 645 ror x14,x9,#1 646 and x17,x21,x20 647 ror x13,x6,#19 648 bic x19,x22,x20 649 ror x15,x24,#28 650 add x23,x23,x7 // h+=X[i] 651 eor x16,x16,x20,ror#18 652 eor x14,x14,x9,ror#8 653 orr x17,x17,x19 // Ch(e,f,g) 654 eor x19,x24,x25 // a^b, b^c in next round 655 eor x16,x16,x20,ror#41 // Sigma1(e) 656 eor x15,x15,x24,ror#34 657 add x23,x23,x17 // h+=Ch(e,f,g) 658 and x28,x28,x19 // (b^c)&=(a^b) 659 eor x13,x13,x6,ror#61 660 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 661 add x23,x23,x16 // h+=Sigma1(e) 662 eor x28,x28,x25 // Maj(a,b,c) 663 eor x17,x15,x24,ror#39 // Sigma0(a) 664 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 665 add x8,x8,x1 666 add x27,x27,x23 // d+=h 667 add x23,x23,x28 // h+=Maj(a,b,c) 668 ldr x28,[x30],#8 // *K++, x19 in next round 669 add x8,x8,x14 670 add x23,x23,x17 // h+=Sigma0(a) 671 add x8,x8,x13 672 ldr x13,[sp,#16] 673 str x0,[sp,#8] 674 ror x16,x27,#14 675 add x22,x22,x28 // h+=K[i] 676 ror x15,x10,#1 677 and x17,x20,x27 678 ror x14,x7,#19 679 bic x28,x21,x27 680 ror x0,x23,#28 681 add x22,x22,x8 // h+=X[i] 682 eor x16,x16,x27,ror#18 683 eor x15,x15,x10,ror#8 684 orr x17,x17,x28 // Ch(e,f,g) 685 eor x28,x23,x24 // a^b, b^c in next round 686 eor x16,x16,x27,ror#41 // Sigma1(e) 687 eor x0,x0,x23,ror#34 688 add x22,x22,x17 // h+=Ch(e,f,g) 689 and x19,x19,x28 // (b^c)&=(a^b) 690 eor x14,x14,x7,ror#61 691 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 692 add x22,x22,x16 // h+=Sigma1(e) 693 eor x19,x19,x24 // Maj(a,b,c) 694 eor x17,x0,x23,ror#39 // Sigma0(a) 695 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 696 add x9,x9,x2 697 add x26,x26,x22 // d+=h 698 add x22,x22,x19 // h+=Maj(a,b,c) 699 ldr x19,[x30],#8 // *K++, x28 in next round 700 add x9,x9,x15 701 add x22,x22,x17 // h+=Sigma0(a) 702 add x9,x9,x14 703 ldr x14,[sp,#24] 704 str x1,[sp,#16] 705 ror x16,x26,#14 706 add x21,x21,x19 // h+=K[i] 707 ror x0,x11,#1 708 and x17,x27,x26 709 ror x15,x8,#19 710 bic x19,x20,x26 711 ror x1,x22,#28 712 add x21,x21,x9 // h+=X[i] 713 eor x16,x16,x26,ror#18 714 eor x0,x0,x11,ror#8 715 orr x17,x17,x19 // Ch(e,f,g) 716 eor x19,x22,x23 // a^b, b^c in next round 717 eor x16,x16,x26,ror#41 // Sigma1(e) 718 eor x1,x1,x22,ror#34 719 add x21,x21,x17 // h+=Ch(e,f,g) 720 and x28,x28,x19 // (b^c)&=(a^b) 721 eor x15,x15,x8,ror#61 722 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 723 add x21,x21,x16 // h+=Sigma1(e) 724 eor x28,x28,x23 // Maj(a,b,c) 725 eor x17,x1,x22,ror#39 // Sigma0(a) 726 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 727 add x10,x10,x3 728 add x25,x25,x21 // d+=h 729 add x21,x21,x28 // h+=Maj(a,b,c) 730 ldr x28,[x30],#8 // *K++, x19 in next round 731 add x10,x10,x0 732 add x21,x21,x17 // h+=Sigma0(a) 733 add x10,x10,x15 734 ldr x15,[sp,#0] 735 str x2,[sp,#24] 736 ror x16,x25,#14 737 add x20,x20,x28 // h+=K[i] 738 ror x1,x12,#1 739 and x17,x26,x25 740 ror x0,x9,#19 741 bic x28,x27,x25 742 ror x2,x21,#28 743 add x20,x20,x10 // h+=X[i] 744 eor x16,x16,x25,ror#18 745 eor x1,x1,x12,ror#8 746 orr x17,x17,x28 // Ch(e,f,g) 747 eor x28,x21,x22 // a^b, b^c in next round 748 eor x16,x16,x25,ror#41 // Sigma1(e) 749 eor x2,x2,x21,ror#34 750 add x20,x20,x17 // h+=Ch(e,f,g) 751 and x19,x19,x28 // (b^c)&=(a^b) 752 eor x0,x0,x9,ror#61 753 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 754 add x20,x20,x16 // h+=Sigma1(e) 755 eor x19,x19,x22 // Maj(a,b,c) 756 eor x17,x2,x21,ror#39 // Sigma0(a) 757 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 758 add x11,x11,x4 759 add x24,x24,x20 // d+=h 760 add x20,x20,x19 // h+=Maj(a,b,c) 761 ldr x19,[x30],#8 // *K++, x28 in next round 762 add x11,x11,x1 763 add x20,x20,x17 // h+=Sigma0(a) 764 add x11,x11,x0 765 ldr x0,[sp,#8] 766 str x3,[sp,#0] 767 ror x16,x24,#14 768 add x27,x27,x19 // h+=K[i] 769 ror x2,x13,#1 770 and x17,x25,x24 771 ror x1,x10,#19 772 bic x19,x26,x24 773 ror x3,x20,#28 774 add x27,x27,x11 // h+=X[i] 775 eor x16,x16,x24,ror#18 776 eor x2,x2,x13,ror#8 777 orr x17,x17,x19 // Ch(e,f,g) 778 eor x19,x20,x21 // a^b, b^c in next round 779 eor x16,x16,x24,ror#41 // Sigma1(e) 780 eor x3,x3,x20,ror#34 781 add x27,x27,x17 // h+=Ch(e,f,g) 782 and x28,x28,x19 // (b^c)&=(a^b) 783 eor x1,x1,x10,ror#61 784 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 785 add x27,x27,x16 // h+=Sigma1(e) 786 eor x28,x28,x21 // Maj(a,b,c) 787 eor x17,x3,x20,ror#39 // Sigma0(a) 788 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 789 add x12,x12,x5 790 add x23,x23,x27 // d+=h 791 add x27,x27,x28 // h+=Maj(a,b,c) 792 ldr x28,[x30],#8 // *K++, x19 in next round 793 add x12,x12,x2 794 add x27,x27,x17 // h+=Sigma0(a) 795 add x12,x12,x1 796 ldr x1,[sp,#16] 797 str x4,[sp,#8] 798 ror x16,x23,#14 799 add x26,x26,x28 // h+=K[i] 800 ror x3,x14,#1 801 and x17,x24,x23 802 ror x2,x11,#19 803 bic x28,x25,x23 804 ror x4,x27,#28 805 add x26,x26,x12 // h+=X[i] 806 eor x16,x16,x23,ror#18 807 eor x3,x3,x14,ror#8 808 orr x17,x17,x28 // Ch(e,f,g) 809 eor x28,x27,x20 // a^b, b^c in next round 810 eor x16,x16,x23,ror#41 // Sigma1(e) 811 eor x4,x4,x27,ror#34 812 add x26,x26,x17 // h+=Ch(e,f,g) 813 and x19,x19,x28 // (b^c)&=(a^b) 814 eor x2,x2,x11,ror#61 815 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 816 add x26,x26,x16 // h+=Sigma1(e) 817 eor x19,x19,x20 // Maj(a,b,c) 818 eor x17,x4,x27,ror#39 // Sigma0(a) 819 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 820 add x13,x13,x6 821 add x22,x22,x26 // d+=h 822 add x26,x26,x19 // h+=Maj(a,b,c) 823 ldr x19,[x30],#8 // *K++, x28 in next round 824 add x13,x13,x3 825 add x26,x26,x17 // h+=Sigma0(a) 826 add x13,x13,x2 827 ldr x2,[sp,#24] 828 str x5,[sp,#16] 829 ror x16,x22,#14 830 add x25,x25,x19 // h+=K[i] 831 ror x4,x15,#1 832 and x17,x23,x22 833 ror x3,x12,#19 834 bic x19,x24,x22 835 ror x5,x26,#28 836 add x25,x25,x13 // h+=X[i] 837 eor x16,x16,x22,ror#18 838 eor x4,x4,x15,ror#8 839 orr x17,x17,x19 // Ch(e,f,g) 840 eor x19,x26,x27 // a^b, b^c in next round 841 eor x16,x16,x22,ror#41 // Sigma1(e) 842 eor x5,x5,x26,ror#34 843 add x25,x25,x17 // h+=Ch(e,f,g) 844 and x28,x28,x19 // (b^c)&=(a^b) 845 eor x3,x3,x12,ror#61 846 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 847 add x25,x25,x16 // h+=Sigma1(e) 848 eor x28,x28,x27 // Maj(a,b,c) 849 eor x17,x5,x26,ror#39 // Sigma0(a) 850 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 851 add x14,x14,x7 852 add x21,x21,x25 // d+=h 853 add x25,x25,x28 // h+=Maj(a,b,c) 854 ldr x28,[x30],#8 // *K++, x19 in next round 855 add x14,x14,x4 856 add x25,x25,x17 // h+=Sigma0(a) 857 add x14,x14,x3 858 ldr x3,[sp,#0] 859 str x6,[sp,#24] 860 ror x16,x21,#14 861 add x24,x24,x28 // h+=K[i] 862 ror x5,x0,#1 863 and x17,x22,x21 864 ror x4,x13,#19 865 bic x28,x23,x21 866 ror x6,x25,#28 867 add x24,x24,x14 // h+=X[i] 868 eor x16,x16,x21,ror#18 869 eor x5,x5,x0,ror#8 870 orr x17,x17,x28 // Ch(e,f,g) 871 eor x28,x25,x26 // a^b, b^c in next round 872 eor x16,x16,x21,ror#41 // Sigma1(e) 873 eor x6,x6,x25,ror#34 874 add x24,x24,x17 // h+=Ch(e,f,g) 875 and x19,x19,x28 // (b^c)&=(a^b) 876 eor x4,x4,x13,ror#61 877 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 878 add x24,x24,x16 // h+=Sigma1(e) 879 eor x19,x19,x26 // Maj(a,b,c) 880 eor x17,x6,x25,ror#39 // Sigma0(a) 881 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 882 add x15,x15,x8 883 add x20,x20,x24 // d+=h 884 add x24,x24,x19 // h+=Maj(a,b,c) 885 ldr x19,[x30],#8 // *K++, x28 in next round 886 add x15,x15,x5 887 add x24,x24,x17 // h+=Sigma0(a) 888 add x15,x15,x4 889 ldr x4,[sp,#8] 890 str x7,[sp,#0] 891 ror x16,x20,#14 892 add x23,x23,x19 // h+=K[i] 893 ror x6,x1,#1 894 and x17,x21,x20 895 ror x5,x14,#19 896 bic x19,x22,x20 897 ror x7,x24,#28 898 add x23,x23,x15 // h+=X[i] 899 eor x16,x16,x20,ror#18 900 eor x6,x6,x1,ror#8 901 orr x17,x17,x19 // Ch(e,f,g) 902 eor x19,x24,x25 // a^b, b^c in next round 903 eor x16,x16,x20,ror#41 // Sigma1(e) 904 eor x7,x7,x24,ror#34 905 add x23,x23,x17 // h+=Ch(e,f,g) 906 and x28,x28,x19 // (b^c)&=(a^b) 907 eor x5,x5,x14,ror#61 908 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 909 add x23,x23,x16 // h+=Sigma1(e) 910 eor x28,x28,x25 // Maj(a,b,c) 911 eor x17,x7,x24,ror#39 // Sigma0(a) 912 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 913 add x0,x0,x9 914 add x27,x27,x23 // d+=h 915 add x23,x23,x28 // h+=Maj(a,b,c) 916 ldr x28,[x30],#8 // *K++, x19 in next round 917 add x0,x0,x6 918 add x23,x23,x17 // h+=Sigma0(a) 919 add x0,x0,x5 920 ldr x5,[sp,#16] 921 str x8,[sp,#8] 922 ror x16,x27,#14 923 add x22,x22,x28 // h+=K[i] 924 ror x7,x2,#1 925 and x17,x20,x27 926 ror x6,x15,#19 927 bic x28,x21,x27 928 ror x8,x23,#28 929 add x22,x22,x0 // h+=X[i] 930 eor x16,x16,x27,ror#18 931 eor x7,x7,x2,ror#8 932 orr x17,x17,x28 // Ch(e,f,g) 933 eor x28,x23,x24 // a^b, b^c in next round 934 eor x16,x16,x27,ror#41 // Sigma1(e) 935 eor x8,x8,x23,ror#34 936 add x22,x22,x17 // h+=Ch(e,f,g) 937 and x19,x19,x28 // (b^c)&=(a^b) 938 eor x6,x6,x15,ror#61 939 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 940 add x22,x22,x16 // h+=Sigma1(e) 941 eor x19,x19,x24 // Maj(a,b,c) 942 eor x17,x8,x23,ror#39 // Sigma0(a) 943 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 944 add x1,x1,x10 945 add x26,x26,x22 // d+=h 946 add x22,x22,x19 // h+=Maj(a,b,c) 947 ldr x19,[x30],#8 // *K++, x28 in next round 948 add x1,x1,x7 949 add x22,x22,x17 // h+=Sigma0(a) 950 add x1,x1,x6 951 ldr x6,[sp,#24] 952 str x9,[sp,#16] 953 ror x16,x26,#14 954 add x21,x21,x19 // h+=K[i] 955 ror x8,x3,#1 956 and x17,x27,x26 957 ror x7,x0,#19 958 bic x19,x20,x26 959 ror x9,x22,#28 960 add x21,x21,x1 // h+=X[i] 961 eor x16,x16,x26,ror#18 962 eor x8,x8,x3,ror#8 963 orr x17,x17,x19 // Ch(e,f,g) 964 eor x19,x22,x23 // a^b, b^c in next round 965 eor x16,x16,x26,ror#41 // Sigma1(e) 966 eor x9,x9,x22,ror#34 967 add x21,x21,x17 // h+=Ch(e,f,g) 968 and x28,x28,x19 // (b^c)&=(a^b) 969 eor x7,x7,x0,ror#61 970 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 971 add x21,x21,x16 // h+=Sigma1(e) 972 eor x28,x28,x23 // Maj(a,b,c) 973 eor x17,x9,x22,ror#39 // Sigma0(a) 974 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 975 add x2,x2,x11 976 add x25,x25,x21 // d+=h 977 add x21,x21,x28 // h+=Maj(a,b,c) 978 ldr x28,[x30],#8 // *K++, x19 in next round 979 add x2,x2,x8 980 add x21,x21,x17 // h+=Sigma0(a) 981 add x2,x2,x7 982 ldr x7,[sp,#0] 983 str x10,[sp,#24] 984 ror x16,x25,#14 985 add x20,x20,x28 // h+=K[i] 986 ror x9,x4,#1 987 and x17,x26,x25 988 ror x8,x1,#19 989 bic x28,x27,x25 990 ror x10,x21,#28 991 add x20,x20,x2 // h+=X[i] 992 eor x16,x16,x25,ror#18 993 eor x9,x9,x4,ror#8 994 orr x17,x17,x28 // Ch(e,f,g) 995 eor x28,x21,x22 // a^b, b^c in next round 996 eor x16,x16,x25,ror#41 // Sigma1(e) 997 eor x10,x10,x21,ror#34 998 add x20,x20,x17 // h+=Ch(e,f,g) 999 and x19,x19,x28 // (b^c)&=(a^b) 1000 eor x8,x8,x1,ror#61 1001 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 1002 add x20,x20,x16 // h+=Sigma1(e) 1003 eor x19,x19,x22 // Maj(a,b,c) 1004 eor x17,x10,x21,ror#39 // Sigma0(a) 1005 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 1006 add x3,x3,x12 1007 add x24,x24,x20 // d+=h 1008 add x20,x20,x19 // h+=Maj(a,b,c) 1009 ldr x19,[x30],#8 // *K++, x28 in next round 1010 add x3,x3,x9 1011 add x20,x20,x17 // h+=Sigma0(a) 1012 add x3,x3,x8 1013 cbnz x19,.Loop_16_xx 1014 1015 ldp x0,x2,[x29,#96] 1016 ldr x1,[x29,#112] 1017 sub x30,x30,#648 // rewind 1018 1019 ldp x3,x4,[x0] 1020 ldp x5,x6,[x0,#2*8] 1021 add x1,x1,#14*8 // advance input pointer 1022 ldp x7,x8,[x0,#4*8] 1023 add x20,x20,x3 1024 ldp x9,x10,[x0,#6*8] 1025 add x21,x21,x4 1026 add x22,x22,x5 1027 add x23,x23,x6 1028 stp x20,x21,[x0] 1029 add x24,x24,x7 1030 add x25,x25,x8 1031 stp x22,x23,[x0,#2*8] 1032 add x26,x26,x9 1033 add x27,x27,x10 1034 cmp x1,x2 1035 stp x24,x25,[x0,#4*8] 1036 stp x26,x27,[x0,#6*8] 1037 b.ne .Loop 1038 1039 ldp x19,x20,[x29,#16] 1040 add sp,sp,#4*8 1041 ldp x21,x22,[x29,#32] 1042 ldp x23,x24,[x29,#48] 1043 ldp x25,x26,[x29,#64] 1044 ldp x27,x28,[x29,#80] 1045 ldp x29,x30,[sp],#128 1046 ret 1047.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 1048 1049 1050.globl zfs_sha512_block_armv8 1051.type zfs_sha512_block_armv8,%function 1052.align 6 1053zfs_sha512_block_armv8: 1054 hint #34 // bti c 1055.Lv8_entry: 1056 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later 1057 stp x29,x30,[sp,#-16]! 1058 add x29,sp,#0 1059 1060 ld1 {v16.16b-v19.16b},[x1],#64 // load input 1061 ld1 {v20.16b-v23.16b},[x1],#64 1062 1063 ld1 {v0.2d-v3.2d},[x0] // load context 1064 adr x3,.LK512 1065 1066 rev64 v16.16b,v16.16b 1067 rev64 v17.16b,v17.16b 1068 rev64 v18.16b,v18.16b 1069 rev64 v19.16b,v19.16b 1070 rev64 v20.16b,v20.16b 1071 rev64 v21.16b,v21.16b 1072 rev64 v22.16b,v22.16b 1073 rev64 v23.16b,v23.16b 1074 b .Loop_hw 1075 1076.align 4 1077.Loop_hw: 1078 ld1 {v24.2d},[x3],#16 1079 subs x2,x2,#1 1080 sub x4,x1,#128 1081 orr v26.16b,v0.16b,v0.16b // offload 1082 orr v27.16b,v1.16b,v1.16b 1083 orr v28.16b,v2.16b,v2.16b 1084 orr v29.16b,v3.16b,v3.16b 1085 csel x1,x1,x4,ne // conditional rewind 1086 add v24.2d,v24.2d,v16.2d 1087 ld1 {v25.2d},[x3],#16 1088 ext v24.16b,v24.16b,v24.16b,#8 1089 ext v5.16b,v2.16b,v3.16b,#8 1090 ext v6.16b,v1.16b,v2.16b,#8 1091 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1092 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1093 ext v7.16b,v20.16b,v21.16b,#8 1094 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1095 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1096 add v4.2d,v1.2d,v3.2d // "D + T1" 1097 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1098 add v25.2d,v25.2d,v17.2d 1099 ld1 {v24.2d},[x3],#16 1100 ext v25.16b,v25.16b,v25.16b,#8 1101 ext v5.16b,v4.16b,v2.16b,#8 1102 ext v6.16b,v0.16b,v4.16b,#8 1103 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1104 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1105 ext v7.16b,v21.16b,v22.16b,#8 1106 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1107 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1108 add v1.2d,v0.2d,v2.2d // "D + T1" 1109 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1110 add v24.2d,v24.2d,v18.2d 1111 ld1 {v25.2d},[x3],#16 1112 ext v24.16b,v24.16b,v24.16b,#8 1113 ext v5.16b,v1.16b,v4.16b,#8 1114 ext v6.16b,v3.16b,v1.16b,#8 1115 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1116 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1117 ext v7.16b,v22.16b,v23.16b,#8 1118 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1119 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1120 add v0.2d,v3.2d,v4.2d // "D + T1" 1121 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1122 add v25.2d,v25.2d,v19.2d 1123 ld1 {v24.2d},[x3],#16 1124 ext v25.16b,v25.16b,v25.16b,#8 1125 ext v5.16b,v0.16b,v1.16b,#8 1126 ext v6.16b,v2.16b,v0.16b,#8 1127 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1128 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1129 ext v7.16b,v23.16b,v16.16b,#8 1130 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1131 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1132 add v3.2d,v2.2d,v1.2d // "D + T1" 1133 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1134 add v24.2d,v24.2d,v20.2d 1135 ld1 {v25.2d},[x3],#16 1136 ext v24.16b,v24.16b,v24.16b,#8 1137 ext v5.16b,v3.16b,v0.16b,#8 1138 ext v6.16b,v4.16b,v3.16b,#8 1139 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1140 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1141 ext v7.16b,v16.16b,v17.16b,#8 1142 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1143 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1144 add v2.2d,v4.2d,v0.2d // "D + T1" 1145 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1146 add v25.2d,v25.2d,v21.2d 1147 ld1 {v24.2d},[x3],#16 1148 ext v25.16b,v25.16b,v25.16b,#8 1149 ext v5.16b,v2.16b,v3.16b,#8 1150 ext v6.16b,v1.16b,v2.16b,#8 1151 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1152 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1153 ext v7.16b,v17.16b,v18.16b,#8 1154 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1155 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1156 add v4.2d,v1.2d,v3.2d // "D + T1" 1157 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1158 add v24.2d,v24.2d,v22.2d 1159 ld1 {v25.2d},[x3],#16 1160 ext v24.16b,v24.16b,v24.16b,#8 1161 ext v5.16b,v4.16b,v2.16b,#8 1162 ext v6.16b,v0.16b,v4.16b,#8 1163 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1164 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1165 ext v7.16b,v18.16b,v19.16b,#8 1166 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1167 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1168 add v1.2d,v0.2d,v2.2d // "D + T1" 1169 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1170 add v25.2d,v25.2d,v23.2d 1171 ld1 {v24.2d},[x3],#16 1172 ext v25.16b,v25.16b,v25.16b,#8 1173 ext v5.16b,v1.16b,v4.16b,#8 1174 ext v6.16b,v3.16b,v1.16b,#8 1175 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1176 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1177 ext v7.16b,v19.16b,v20.16b,#8 1178 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1179 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1180 add v0.2d,v3.2d,v4.2d // "D + T1" 1181 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1182 add v24.2d,v24.2d,v16.2d 1183 ld1 {v25.2d},[x3],#16 1184 ext v24.16b,v24.16b,v24.16b,#8 1185 ext v5.16b,v0.16b,v1.16b,#8 1186 ext v6.16b,v2.16b,v0.16b,#8 1187 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1188 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1189 ext v7.16b,v20.16b,v21.16b,#8 1190 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1191 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1192 add v3.2d,v2.2d,v1.2d // "D + T1" 1193 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1194 add v25.2d,v25.2d,v17.2d 1195 ld1 {v24.2d},[x3],#16 1196 ext v25.16b,v25.16b,v25.16b,#8 1197 ext v5.16b,v3.16b,v0.16b,#8 1198 ext v6.16b,v4.16b,v3.16b,#8 1199 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1200 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1201 ext v7.16b,v21.16b,v22.16b,#8 1202 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1203 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1204 add v2.2d,v4.2d,v0.2d // "D + T1" 1205 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1206 add v24.2d,v24.2d,v18.2d 1207 ld1 {v25.2d},[x3],#16 1208 ext v24.16b,v24.16b,v24.16b,#8 1209 ext v5.16b,v2.16b,v3.16b,#8 1210 ext v6.16b,v1.16b,v2.16b,#8 1211 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1212 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1213 ext v7.16b,v22.16b,v23.16b,#8 1214 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1215 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1216 add v4.2d,v1.2d,v3.2d // "D + T1" 1217 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1218 add v25.2d,v25.2d,v19.2d 1219 ld1 {v24.2d},[x3],#16 1220 ext v25.16b,v25.16b,v25.16b,#8 1221 ext v5.16b,v4.16b,v2.16b,#8 1222 ext v6.16b,v0.16b,v4.16b,#8 1223 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1224 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1225 ext v7.16b,v23.16b,v16.16b,#8 1226 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1227 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1228 add v1.2d,v0.2d,v2.2d // "D + T1" 1229 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1230 add v24.2d,v24.2d,v20.2d 1231 ld1 {v25.2d},[x3],#16 1232 ext v24.16b,v24.16b,v24.16b,#8 1233 ext v5.16b,v1.16b,v4.16b,#8 1234 ext v6.16b,v3.16b,v1.16b,#8 1235 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1236 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1237 ext v7.16b,v16.16b,v17.16b,#8 1238 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1239 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1240 add v0.2d,v3.2d,v4.2d // "D + T1" 1241 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1242 add v25.2d,v25.2d,v21.2d 1243 ld1 {v24.2d},[x3],#16 1244 ext v25.16b,v25.16b,v25.16b,#8 1245 ext v5.16b,v0.16b,v1.16b,#8 1246 ext v6.16b,v2.16b,v0.16b,#8 1247 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1248 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1249 ext v7.16b,v17.16b,v18.16b,#8 1250 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1251 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1252 add v3.2d,v2.2d,v1.2d // "D + T1" 1253 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1254 add v24.2d,v24.2d,v22.2d 1255 ld1 {v25.2d},[x3],#16 1256 ext v24.16b,v24.16b,v24.16b,#8 1257 ext v5.16b,v3.16b,v0.16b,#8 1258 ext v6.16b,v4.16b,v3.16b,#8 1259 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1260 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1261 ext v7.16b,v18.16b,v19.16b,#8 1262 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1263 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1264 add v2.2d,v4.2d,v0.2d // "D + T1" 1265 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1266 add v25.2d,v25.2d,v23.2d 1267 ld1 {v24.2d},[x3],#16 1268 ext v25.16b,v25.16b,v25.16b,#8 1269 ext v5.16b,v2.16b,v3.16b,#8 1270 ext v6.16b,v1.16b,v2.16b,#8 1271 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1272 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1273 ext v7.16b,v19.16b,v20.16b,#8 1274 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1275 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1276 add v4.2d,v1.2d,v3.2d // "D + T1" 1277 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1278 add v24.2d,v24.2d,v16.2d 1279 ld1 {v25.2d},[x3],#16 1280 ext v24.16b,v24.16b,v24.16b,#8 1281 ext v5.16b,v4.16b,v2.16b,#8 1282 ext v6.16b,v0.16b,v4.16b,#8 1283 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1284 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1285 ext v7.16b,v20.16b,v21.16b,#8 1286 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1287 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1288 add v1.2d,v0.2d,v2.2d // "D + T1" 1289 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1290 add v25.2d,v25.2d,v17.2d 1291 ld1 {v24.2d},[x3],#16 1292 ext v25.16b,v25.16b,v25.16b,#8 1293 ext v5.16b,v1.16b,v4.16b,#8 1294 ext v6.16b,v3.16b,v1.16b,#8 1295 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1296 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1297 ext v7.16b,v21.16b,v22.16b,#8 1298 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1299 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1300 add v0.2d,v3.2d,v4.2d // "D + T1" 1301 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1302 add v24.2d,v24.2d,v18.2d 1303 ld1 {v25.2d},[x3],#16 1304 ext v24.16b,v24.16b,v24.16b,#8 1305 ext v5.16b,v0.16b,v1.16b,#8 1306 ext v6.16b,v2.16b,v0.16b,#8 1307 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1308 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1309 ext v7.16b,v22.16b,v23.16b,#8 1310 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1311 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1312 add v3.2d,v2.2d,v1.2d // "D + T1" 1313 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1314 add v25.2d,v25.2d,v19.2d 1315 ld1 {v24.2d},[x3],#16 1316 ext v25.16b,v25.16b,v25.16b,#8 1317 ext v5.16b,v3.16b,v0.16b,#8 1318 ext v6.16b,v4.16b,v3.16b,#8 1319 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1320 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1321 ext v7.16b,v23.16b,v16.16b,#8 1322 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1323 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1324 add v2.2d,v4.2d,v0.2d // "D + T1" 1325 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1326 add v24.2d,v24.2d,v20.2d 1327 ld1 {v25.2d},[x3],#16 1328 ext v24.16b,v24.16b,v24.16b,#8 1329 ext v5.16b,v2.16b,v3.16b,#8 1330 ext v6.16b,v1.16b,v2.16b,#8 1331 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1332 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1333 ext v7.16b,v16.16b,v17.16b,#8 1334 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1335 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1336 add v4.2d,v1.2d,v3.2d // "D + T1" 1337 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1338 add v25.2d,v25.2d,v21.2d 1339 ld1 {v24.2d},[x3],#16 1340 ext v25.16b,v25.16b,v25.16b,#8 1341 ext v5.16b,v4.16b,v2.16b,#8 1342 ext v6.16b,v0.16b,v4.16b,#8 1343 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1344 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1345 ext v7.16b,v17.16b,v18.16b,#8 1346 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1347 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1348 add v1.2d,v0.2d,v2.2d // "D + T1" 1349 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1350 add v24.2d,v24.2d,v22.2d 1351 ld1 {v25.2d},[x3],#16 1352 ext v24.16b,v24.16b,v24.16b,#8 1353 ext v5.16b,v1.16b,v4.16b,#8 1354 ext v6.16b,v3.16b,v1.16b,#8 1355 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1356 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1357 ext v7.16b,v18.16b,v19.16b,#8 1358 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1359 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1360 add v0.2d,v3.2d,v4.2d // "D + T1" 1361 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1362 add v25.2d,v25.2d,v23.2d 1363 ld1 {v24.2d},[x3],#16 1364 ext v25.16b,v25.16b,v25.16b,#8 1365 ext v5.16b,v0.16b,v1.16b,#8 1366 ext v6.16b,v2.16b,v0.16b,#8 1367 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1368 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1369 ext v7.16b,v19.16b,v20.16b,#8 1370 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1371 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1372 add v3.2d,v2.2d,v1.2d // "D + T1" 1373 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1374 add v24.2d,v24.2d,v16.2d 1375 ld1 {v25.2d},[x3],#16 1376 ext v24.16b,v24.16b,v24.16b,#8 1377 ext v5.16b,v3.16b,v0.16b,#8 1378 ext v6.16b,v4.16b,v3.16b,#8 1379 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1380 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1381 ext v7.16b,v20.16b,v21.16b,#8 1382 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1383 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1384 add v2.2d,v4.2d,v0.2d // "D + T1" 1385 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1386 add v25.2d,v25.2d,v17.2d 1387 ld1 {v24.2d},[x3],#16 1388 ext v25.16b,v25.16b,v25.16b,#8 1389 ext v5.16b,v2.16b,v3.16b,#8 1390 ext v6.16b,v1.16b,v2.16b,#8 1391 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1392 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1393 ext v7.16b,v21.16b,v22.16b,#8 1394 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1395 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1396 add v4.2d,v1.2d,v3.2d // "D + T1" 1397 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1398 add v24.2d,v24.2d,v18.2d 1399 ld1 {v25.2d},[x3],#16 1400 ext v24.16b,v24.16b,v24.16b,#8 1401 ext v5.16b,v4.16b,v2.16b,#8 1402 ext v6.16b,v0.16b,v4.16b,#8 1403 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1404 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1405 ext v7.16b,v22.16b,v23.16b,#8 1406 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1407 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1408 add v1.2d,v0.2d,v2.2d // "D + T1" 1409 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1410 add v25.2d,v25.2d,v19.2d 1411 ld1 {v24.2d},[x3],#16 1412 ext v25.16b,v25.16b,v25.16b,#8 1413 ext v5.16b,v1.16b,v4.16b,#8 1414 ext v6.16b,v3.16b,v1.16b,#8 1415 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1416 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1417 ext v7.16b,v23.16b,v16.16b,#8 1418 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1419 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1420 add v0.2d,v3.2d,v4.2d // "D + T1" 1421 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1422 add v24.2d,v24.2d,v20.2d 1423 ld1 {v25.2d},[x3],#16 1424 ext v24.16b,v24.16b,v24.16b,#8 1425 ext v5.16b,v0.16b,v1.16b,#8 1426 ext v6.16b,v2.16b,v0.16b,#8 1427 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1428 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1429 ext v7.16b,v16.16b,v17.16b,#8 1430 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1431 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1432 add v3.2d,v2.2d,v1.2d // "D + T1" 1433 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1434 add v25.2d,v25.2d,v21.2d 1435 ld1 {v24.2d},[x3],#16 1436 ext v25.16b,v25.16b,v25.16b,#8 1437 ext v5.16b,v3.16b,v0.16b,#8 1438 ext v6.16b,v4.16b,v3.16b,#8 1439 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1440 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1441 ext v7.16b,v17.16b,v18.16b,#8 1442 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1443 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1444 add v2.2d,v4.2d,v0.2d // "D + T1" 1445 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1446 add v24.2d,v24.2d,v22.2d 1447 ld1 {v25.2d},[x3],#16 1448 ext v24.16b,v24.16b,v24.16b,#8 1449 ext v5.16b,v2.16b,v3.16b,#8 1450 ext v6.16b,v1.16b,v2.16b,#8 1451 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1452 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1453 ext v7.16b,v18.16b,v19.16b,#8 1454 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1455 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1456 add v4.2d,v1.2d,v3.2d // "D + T1" 1457 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1458 add v25.2d,v25.2d,v23.2d 1459 ld1 {v24.2d},[x3],#16 1460 ext v25.16b,v25.16b,v25.16b,#8 1461 ext v5.16b,v4.16b,v2.16b,#8 1462 ext v6.16b,v0.16b,v4.16b,#8 1463 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1464 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1465 ext v7.16b,v19.16b,v20.16b,#8 1466 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1467 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1468 add v1.2d,v0.2d,v2.2d // "D + T1" 1469 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1470 ld1 {v25.2d},[x3],#16 1471 add v24.2d,v24.2d,v16.2d 1472 ld1 {v16.16b},[x1],#16 // load next input 1473 ext v24.16b,v24.16b,v24.16b,#8 1474 ext v5.16b,v1.16b,v4.16b,#8 1475 ext v6.16b,v3.16b,v1.16b,#8 1476 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1477 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1478 rev64 v16.16b,v16.16b 1479 add v0.2d,v3.2d,v4.2d // "D + T1" 1480 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1481 ld1 {v24.2d},[x3],#16 1482 add v25.2d,v25.2d,v17.2d 1483 ld1 {v17.16b},[x1],#16 // load next input 1484 ext v25.16b,v25.16b,v25.16b,#8 1485 ext v5.16b,v0.16b,v1.16b,#8 1486 ext v6.16b,v2.16b,v0.16b,#8 1487 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1488 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1489 rev64 v17.16b,v17.16b 1490 add v3.2d,v2.2d,v1.2d // "D + T1" 1491 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1492 ld1 {v25.2d},[x3],#16 1493 add v24.2d,v24.2d,v18.2d 1494 ld1 {v18.16b},[x1],#16 // load next input 1495 ext v24.16b,v24.16b,v24.16b,#8 1496 ext v5.16b,v3.16b,v0.16b,#8 1497 ext v6.16b,v4.16b,v3.16b,#8 1498 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1499 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1500 rev64 v18.16b,v18.16b 1501 add v2.2d,v4.2d,v0.2d // "D + T1" 1502 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1503 ld1 {v24.2d},[x3],#16 1504 add v25.2d,v25.2d,v19.2d 1505 ld1 {v19.16b},[x1],#16 // load next input 1506 ext v25.16b,v25.16b,v25.16b,#8 1507 ext v5.16b,v2.16b,v3.16b,#8 1508 ext v6.16b,v1.16b,v2.16b,#8 1509 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1510 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1511 rev64 v19.16b,v19.16b 1512 add v4.2d,v1.2d,v3.2d // "D + T1" 1513 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1514 ld1 {v25.2d},[x3],#16 1515 add v24.2d,v24.2d,v20.2d 1516 ld1 {v20.16b},[x1],#16 // load next input 1517 ext v24.16b,v24.16b,v24.16b,#8 1518 ext v5.16b,v4.16b,v2.16b,#8 1519 ext v6.16b,v0.16b,v4.16b,#8 1520 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1521 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1522 rev64 v20.16b,v20.16b 1523 add v1.2d,v0.2d,v2.2d // "D + T1" 1524 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1525 ld1 {v24.2d},[x3],#16 1526 add v25.2d,v25.2d,v21.2d 1527 ld1 {v21.16b},[x1],#16 // load next input 1528 ext v25.16b,v25.16b,v25.16b,#8 1529 ext v5.16b,v1.16b,v4.16b,#8 1530 ext v6.16b,v3.16b,v1.16b,#8 1531 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1532 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1533 rev64 v21.16b,v21.16b 1534 add v0.2d,v3.2d,v4.2d // "D + T1" 1535 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1536 ld1 {v25.2d},[x3],#16 1537 add v24.2d,v24.2d,v22.2d 1538 ld1 {v22.16b},[x1],#16 // load next input 1539 ext v24.16b,v24.16b,v24.16b,#8 1540 ext v5.16b,v0.16b,v1.16b,#8 1541 ext v6.16b,v2.16b,v0.16b,#8 1542 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1543 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1544 rev64 v22.16b,v22.16b 1545 add v3.2d,v2.2d,v1.2d // "D + T1" 1546 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1547 sub x3,x3,#80*8 // rewind 1548 add v25.2d,v25.2d,v23.2d 1549 ld1 {v23.16b},[x1],#16 // load next input 1550 ext v25.16b,v25.16b,v25.16b,#8 1551 ext v5.16b,v3.16b,v0.16b,#8 1552 ext v6.16b,v4.16b,v3.16b,#8 1553 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1554 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1555 rev64 v23.16b,v23.16b 1556 add v2.2d,v4.2d,v0.2d // "D + T1" 1557 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1558 add v0.2d,v0.2d,v26.2d // accumulate 1559 add v1.2d,v1.2d,v27.2d 1560 add v2.2d,v2.2d,v28.2d 1561 add v3.2d,v3.2d,v29.2d 1562 1563 cbnz x2,.Loop_hw 1564 1565 st1 {v0.2d-v3.2d},[x0] // store context 1566 1567 ldr x29,[sp],#16 1568 ret 1569.size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8 1570#endif 1571