1/* 2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * https://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 19 * - modified assembly to fit into OpenZFS 20 */ 21 22#if defined(__aarch64__) 23 24.text 25 26.align 6 27.type .LK512,%object 28.LK512: 29 .quad 0x428a2f98d728ae22,0x7137449123ef65cd 30 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 31 .quad 0x3956c25bf348b538,0x59f111f1b605d019 32 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 33 .quad 0xd807aa98a3030242,0x12835b0145706fbe 34 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 35 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 36 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 37 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 38 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 39 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 40 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 41 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 42 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 43 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 44 .quad 0x06ca6351e003826f,0x142929670a0e6e70 45 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 46 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 47 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 48 .quad 0x81c2c92e47edaee6,0x92722c851482353b 49 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 50 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 51 .quad 0xd192e819d6ef5218,0xd69906245565a910 52 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 53 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 54 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 55 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 56 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 57 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 58 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 59 .quad 0x90befffa23631e28,0xa4506cebde82bde9 60 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 61 .quad 0xca273eceea26619c,0xd186b8c721c0c207 62 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 63 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 64 .quad 0x113f9804bef90dae,0x1b710b35131c471b 65 .quad 0x28db77f523047d84,0x32caab7b40c72493 66 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 67 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 68 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 69 .quad 0 // terminator 70.size .LK512,.-.LK512 71 72.globl zfs_sha512_block_armv7 73.type zfs_sha512_block_armv7,%function 74.align 6 75zfs_sha512_block_armv7: 76 hint #34 // bti c 77 stp x29,x30,[sp,#-128]! 78 add x29,sp,#0 79 80 stp x19,x20,[sp,#16] 81 stp x21,x22,[sp,#32] 82 stp x23,x24,[sp,#48] 83 stp x25,x26,[sp,#64] 84 stp x27,x28,[sp,#80] 85 sub sp,sp,#4*8 86 87 ldp x20,x21,[x0] // load context 88 ldp x22,x23,[x0,#2*8] 89 ldp x24,x25,[x0,#4*8] 90 add x2,x1,x2,lsl#7 // end of input 91 ldp x26,x27,[x0,#6*8] 92 adr x30,.LK512 93 stp x0,x2,[x29,#96] 94 95.Loop: 96 ldp x3,x4,[x1],#2*8 97 ldr x19,[x30],#8 // *K++ 98 eor x28,x21,x22 // magic seed 99 str x1,[x29,#112] 100#ifndef __AARCH64EB__ 101 rev x3,x3 // 0 102#endif 103 ror x16,x24,#14 104 add x27,x27,x19 // h+=K[i] 105 eor x6,x24,x24,ror#23 106 and x17,x25,x24 107 bic x19,x26,x24 108 add x27,x27,x3 // h+=X[i] 109 orr x17,x17,x19 // Ch(e,f,g) 110 eor x19,x20,x21 // a^b, b^c in next round 111 eor x16,x16,x6,ror#18 // Sigma1(e) 112 ror x6,x20,#28 113 add x27,x27,x17 // h+=Ch(e,f,g) 114 eor x17,x20,x20,ror#5 115 add x27,x27,x16 // h+=Sigma1(e) 116 and x28,x28,x19 // (b^c)&=(a^b) 117 add x23,x23,x27 // d+=h 118 eor x28,x28,x21 // Maj(a,b,c) 119 eor x17,x6,x17,ror#34 // Sigma0(a) 120 add x27,x27,x28 // h+=Maj(a,b,c) 121 ldr x28,[x30],#8 // *K++, x19 in next round 122 //add x27,x27,x17 // h+=Sigma0(a) 123#ifndef __AARCH64EB__ 124 rev x4,x4 // 1 125#endif 126 ldp x5,x6,[x1],#2*8 127 add x27,x27,x17 // h+=Sigma0(a) 128 ror x16,x23,#14 129 add x26,x26,x28 // h+=K[i] 130 eor x7,x23,x23,ror#23 131 and x17,x24,x23 132 bic x28,x25,x23 133 add x26,x26,x4 // h+=X[i] 134 orr x17,x17,x28 // Ch(e,f,g) 135 eor x28,x27,x20 // a^b, b^c in next round 136 eor x16,x16,x7,ror#18 // Sigma1(e) 137 ror x7,x27,#28 138 add x26,x26,x17 // h+=Ch(e,f,g) 139 eor x17,x27,x27,ror#5 140 add x26,x26,x16 // h+=Sigma1(e) 141 and x19,x19,x28 // (b^c)&=(a^b) 142 add x22,x22,x26 // d+=h 143 eor x19,x19,x20 // Maj(a,b,c) 144 eor x17,x7,x17,ror#34 // Sigma0(a) 145 add x26,x26,x19 // h+=Maj(a,b,c) 146 ldr x19,[x30],#8 // *K++, x28 in next round 147 //add x26,x26,x17 // h+=Sigma0(a) 148#ifndef __AARCH64EB__ 149 rev x5,x5 // 2 150#endif 151 add x26,x26,x17 // h+=Sigma0(a) 152 ror x16,x22,#14 153 add x25,x25,x19 // h+=K[i] 154 eor x8,x22,x22,ror#23 155 and x17,x23,x22 156 bic x19,x24,x22 157 add x25,x25,x5 // h+=X[i] 158 orr x17,x17,x19 // Ch(e,f,g) 159 eor x19,x26,x27 // a^b, b^c in next round 160 eor x16,x16,x8,ror#18 // Sigma1(e) 161 ror x8,x26,#28 162 add x25,x25,x17 // h+=Ch(e,f,g) 163 eor x17,x26,x26,ror#5 164 add x25,x25,x16 // h+=Sigma1(e) 165 and x28,x28,x19 // (b^c)&=(a^b) 166 add x21,x21,x25 // d+=h 167 eor x28,x28,x27 // Maj(a,b,c) 168 eor x17,x8,x17,ror#34 // Sigma0(a) 169 add x25,x25,x28 // h+=Maj(a,b,c) 170 ldr x28,[x30],#8 // *K++, x19 in next round 171 //add x25,x25,x17 // h+=Sigma0(a) 172#ifndef __AARCH64EB__ 173 rev x6,x6 // 3 174#endif 175 ldp x7,x8,[x1],#2*8 176 add x25,x25,x17 // h+=Sigma0(a) 177 ror x16,x21,#14 178 add x24,x24,x28 // h+=K[i] 179 eor x9,x21,x21,ror#23 180 and x17,x22,x21 181 bic x28,x23,x21 182 add x24,x24,x6 // h+=X[i] 183 orr x17,x17,x28 // Ch(e,f,g) 184 eor x28,x25,x26 // a^b, b^c in next round 185 eor x16,x16,x9,ror#18 // Sigma1(e) 186 ror x9,x25,#28 187 add x24,x24,x17 // h+=Ch(e,f,g) 188 eor x17,x25,x25,ror#5 189 add x24,x24,x16 // h+=Sigma1(e) 190 and x19,x19,x28 // (b^c)&=(a^b) 191 add x20,x20,x24 // d+=h 192 eor x19,x19,x26 // Maj(a,b,c) 193 eor x17,x9,x17,ror#34 // Sigma0(a) 194 add x24,x24,x19 // h+=Maj(a,b,c) 195 ldr x19,[x30],#8 // *K++, x28 in next round 196 //add x24,x24,x17 // h+=Sigma0(a) 197#ifndef __AARCH64EB__ 198 rev x7,x7 // 4 199#endif 200 add x24,x24,x17 // h+=Sigma0(a) 201 ror x16,x20,#14 202 add x23,x23,x19 // h+=K[i] 203 eor x10,x20,x20,ror#23 204 and x17,x21,x20 205 bic x19,x22,x20 206 add x23,x23,x7 // h+=X[i] 207 orr x17,x17,x19 // Ch(e,f,g) 208 eor x19,x24,x25 // a^b, b^c in next round 209 eor x16,x16,x10,ror#18 // Sigma1(e) 210 ror x10,x24,#28 211 add x23,x23,x17 // h+=Ch(e,f,g) 212 eor x17,x24,x24,ror#5 213 add x23,x23,x16 // h+=Sigma1(e) 214 and x28,x28,x19 // (b^c)&=(a^b) 215 add x27,x27,x23 // d+=h 216 eor x28,x28,x25 // Maj(a,b,c) 217 eor x17,x10,x17,ror#34 // Sigma0(a) 218 add x23,x23,x28 // h+=Maj(a,b,c) 219 ldr x28,[x30],#8 // *K++, x19 in next round 220 //add x23,x23,x17 // h+=Sigma0(a) 221#ifndef __AARCH64EB__ 222 rev x8,x8 // 5 223#endif 224 ldp x9,x10,[x1],#2*8 225 add x23,x23,x17 // h+=Sigma0(a) 226 ror x16,x27,#14 227 add x22,x22,x28 // h+=K[i] 228 eor x11,x27,x27,ror#23 229 and x17,x20,x27 230 bic x28,x21,x27 231 add x22,x22,x8 // h+=X[i] 232 orr x17,x17,x28 // Ch(e,f,g) 233 eor x28,x23,x24 // a^b, b^c in next round 234 eor x16,x16,x11,ror#18 // Sigma1(e) 235 ror x11,x23,#28 236 add x22,x22,x17 // h+=Ch(e,f,g) 237 eor x17,x23,x23,ror#5 238 add x22,x22,x16 // h+=Sigma1(e) 239 and x19,x19,x28 // (b^c)&=(a^b) 240 add x26,x26,x22 // d+=h 241 eor x19,x19,x24 // Maj(a,b,c) 242 eor x17,x11,x17,ror#34 // Sigma0(a) 243 add x22,x22,x19 // h+=Maj(a,b,c) 244 ldr x19,[x30],#8 // *K++, x28 in next round 245 //add x22,x22,x17 // h+=Sigma0(a) 246#ifndef __AARCH64EB__ 247 rev x9,x9 // 6 248#endif 249 add x22,x22,x17 // h+=Sigma0(a) 250 ror x16,x26,#14 251 add x21,x21,x19 // h+=K[i] 252 eor x12,x26,x26,ror#23 253 and x17,x27,x26 254 bic x19,x20,x26 255 add x21,x21,x9 // h+=X[i] 256 orr x17,x17,x19 // Ch(e,f,g) 257 eor x19,x22,x23 // a^b, b^c in next round 258 eor x16,x16,x12,ror#18 // Sigma1(e) 259 ror x12,x22,#28 260 add x21,x21,x17 // h+=Ch(e,f,g) 261 eor x17,x22,x22,ror#5 262 add x21,x21,x16 // h+=Sigma1(e) 263 and x28,x28,x19 // (b^c)&=(a^b) 264 add x25,x25,x21 // d+=h 265 eor x28,x28,x23 // Maj(a,b,c) 266 eor x17,x12,x17,ror#34 // Sigma0(a) 267 add x21,x21,x28 // h+=Maj(a,b,c) 268 ldr x28,[x30],#8 // *K++, x19 in next round 269 //add x21,x21,x17 // h+=Sigma0(a) 270#ifndef __AARCH64EB__ 271 rev x10,x10 // 7 272#endif 273 ldp x11,x12,[x1],#2*8 274 add x21,x21,x17 // h+=Sigma0(a) 275 ror x16,x25,#14 276 add x20,x20,x28 // h+=K[i] 277 eor x13,x25,x25,ror#23 278 and x17,x26,x25 279 bic x28,x27,x25 280 add x20,x20,x10 // h+=X[i] 281 orr x17,x17,x28 // Ch(e,f,g) 282 eor x28,x21,x22 // a^b, b^c in next round 283 eor x16,x16,x13,ror#18 // Sigma1(e) 284 ror x13,x21,#28 285 add x20,x20,x17 // h+=Ch(e,f,g) 286 eor x17,x21,x21,ror#5 287 add x20,x20,x16 // h+=Sigma1(e) 288 and x19,x19,x28 // (b^c)&=(a^b) 289 add x24,x24,x20 // d+=h 290 eor x19,x19,x22 // Maj(a,b,c) 291 eor x17,x13,x17,ror#34 // Sigma0(a) 292 add x20,x20,x19 // h+=Maj(a,b,c) 293 ldr x19,[x30],#8 // *K++, x28 in next round 294 //add x20,x20,x17 // h+=Sigma0(a) 295#ifndef __AARCH64EB__ 296 rev x11,x11 // 8 297#endif 298 add x20,x20,x17 // h+=Sigma0(a) 299 ror x16,x24,#14 300 add x27,x27,x19 // h+=K[i] 301 eor x14,x24,x24,ror#23 302 and x17,x25,x24 303 bic x19,x26,x24 304 add x27,x27,x11 // h+=X[i] 305 orr x17,x17,x19 // Ch(e,f,g) 306 eor x19,x20,x21 // a^b, b^c in next round 307 eor x16,x16,x14,ror#18 // Sigma1(e) 308 ror x14,x20,#28 309 add x27,x27,x17 // h+=Ch(e,f,g) 310 eor x17,x20,x20,ror#5 311 add x27,x27,x16 // h+=Sigma1(e) 312 and x28,x28,x19 // (b^c)&=(a^b) 313 add x23,x23,x27 // d+=h 314 eor x28,x28,x21 // Maj(a,b,c) 315 eor x17,x14,x17,ror#34 // Sigma0(a) 316 add x27,x27,x28 // h+=Maj(a,b,c) 317 ldr x28,[x30],#8 // *K++, x19 in next round 318 //add x27,x27,x17 // h+=Sigma0(a) 319#ifndef __AARCH64EB__ 320 rev x12,x12 // 9 321#endif 322 ldp x13,x14,[x1],#2*8 323 add x27,x27,x17 // h+=Sigma0(a) 324 ror x16,x23,#14 325 add x26,x26,x28 // h+=K[i] 326 eor x15,x23,x23,ror#23 327 and x17,x24,x23 328 bic x28,x25,x23 329 add x26,x26,x12 // h+=X[i] 330 orr x17,x17,x28 // Ch(e,f,g) 331 eor x28,x27,x20 // a^b, b^c in next round 332 eor x16,x16,x15,ror#18 // Sigma1(e) 333 ror x15,x27,#28 334 add x26,x26,x17 // h+=Ch(e,f,g) 335 eor x17,x27,x27,ror#5 336 add x26,x26,x16 // h+=Sigma1(e) 337 and x19,x19,x28 // (b^c)&=(a^b) 338 add x22,x22,x26 // d+=h 339 eor x19,x19,x20 // Maj(a,b,c) 340 eor x17,x15,x17,ror#34 // Sigma0(a) 341 add x26,x26,x19 // h+=Maj(a,b,c) 342 ldr x19,[x30],#8 // *K++, x28 in next round 343 //add x26,x26,x17 // h+=Sigma0(a) 344#ifndef __AARCH64EB__ 345 rev x13,x13 // 10 346#endif 347 add x26,x26,x17 // h+=Sigma0(a) 348 ror x16,x22,#14 349 add x25,x25,x19 // h+=K[i] 350 eor x0,x22,x22,ror#23 351 and x17,x23,x22 352 bic x19,x24,x22 353 add x25,x25,x13 // h+=X[i] 354 orr x17,x17,x19 // Ch(e,f,g) 355 eor x19,x26,x27 // a^b, b^c in next round 356 eor x16,x16,x0,ror#18 // Sigma1(e) 357 ror x0,x26,#28 358 add x25,x25,x17 // h+=Ch(e,f,g) 359 eor x17,x26,x26,ror#5 360 add x25,x25,x16 // h+=Sigma1(e) 361 and x28,x28,x19 // (b^c)&=(a^b) 362 add x21,x21,x25 // d+=h 363 eor x28,x28,x27 // Maj(a,b,c) 364 eor x17,x0,x17,ror#34 // Sigma0(a) 365 add x25,x25,x28 // h+=Maj(a,b,c) 366 ldr x28,[x30],#8 // *K++, x19 in next round 367 //add x25,x25,x17 // h+=Sigma0(a) 368#ifndef __AARCH64EB__ 369 rev x14,x14 // 11 370#endif 371 ldp x15,x0,[x1],#2*8 372 add x25,x25,x17 // h+=Sigma0(a) 373 str x6,[sp,#24] 374 ror x16,x21,#14 375 add x24,x24,x28 // h+=K[i] 376 eor x6,x21,x21,ror#23 377 and x17,x22,x21 378 bic x28,x23,x21 379 add x24,x24,x14 // h+=X[i] 380 orr x17,x17,x28 // Ch(e,f,g) 381 eor x28,x25,x26 // a^b, b^c in next round 382 eor x16,x16,x6,ror#18 // Sigma1(e) 383 ror x6,x25,#28 384 add x24,x24,x17 // h+=Ch(e,f,g) 385 eor x17,x25,x25,ror#5 386 add x24,x24,x16 // h+=Sigma1(e) 387 and x19,x19,x28 // (b^c)&=(a^b) 388 add x20,x20,x24 // d+=h 389 eor x19,x19,x26 // Maj(a,b,c) 390 eor x17,x6,x17,ror#34 // Sigma0(a) 391 add x24,x24,x19 // h+=Maj(a,b,c) 392 ldr x19,[x30],#8 // *K++, x28 in next round 393 //add x24,x24,x17 // h+=Sigma0(a) 394#ifndef __AARCH64EB__ 395 rev x15,x15 // 12 396#endif 397 add x24,x24,x17 // h+=Sigma0(a) 398 str x7,[sp,#0] 399 ror x16,x20,#14 400 add x23,x23,x19 // h+=K[i] 401 eor x7,x20,x20,ror#23 402 and x17,x21,x20 403 bic x19,x22,x20 404 add x23,x23,x15 // h+=X[i] 405 orr x17,x17,x19 // Ch(e,f,g) 406 eor x19,x24,x25 // a^b, b^c in next round 407 eor x16,x16,x7,ror#18 // Sigma1(e) 408 ror x7,x24,#28 409 add x23,x23,x17 // h+=Ch(e,f,g) 410 eor x17,x24,x24,ror#5 411 add x23,x23,x16 // h+=Sigma1(e) 412 and x28,x28,x19 // (b^c)&=(a^b) 413 add x27,x27,x23 // d+=h 414 eor x28,x28,x25 // Maj(a,b,c) 415 eor x17,x7,x17,ror#34 // Sigma0(a) 416 add x23,x23,x28 // h+=Maj(a,b,c) 417 ldr x28,[x30],#8 // *K++, x19 in next round 418 //add x23,x23,x17 // h+=Sigma0(a) 419#ifndef __AARCH64EB__ 420 rev x0,x0 // 13 421#endif 422 ldp x1,x2,[x1] 423 add x23,x23,x17 // h+=Sigma0(a) 424 str x8,[sp,#8] 425 ror x16,x27,#14 426 add x22,x22,x28 // h+=K[i] 427 eor x8,x27,x27,ror#23 428 and x17,x20,x27 429 bic x28,x21,x27 430 add x22,x22,x0 // h+=X[i] 431 orr x17,x17,x28 // Ch(e,f,g) 432 eor x28,x23,x24 // a^b, b^c in next round 433 eor x16,x16,x8,ror#18 // Sigma1(e) 434 ror x8,x23,#28 435 add x22,x22,x17 // h+=Ch(e,f,g) 436 eor x17,x23,x23,ror#5 437 add x22,x22,x16 // h+=Sigma1(e) 438 and x19,x19,x28 // (b^c)&=(a^b) 439 add x26,x26,x22 // d+=h 440 eor x19,x19,x24 // Maj(a,b,c) 441 eor x17,x8,x17,ror#34 // Sigma0(a) 442 add x22,x22,x19 // h+=Maj(a,b,c) 443 ldr x19,[x30],#8 // *K++, x28 in next round 444 //add x22,x22,x17 // h+=Sigma0(a) 445#ifndef __AARCH64EB__ 446 rev x1,x1 // 14 447#endif 448 ldr x6,[sp,#24] 449 add x22,x22,x17 // h+=Sigma0(a) 450 str x9,[sp,#16] 451 ror x16,x26,#14 452 add x21,x21,x19 // h+=K[i] 453 eor x9,x26,x26,ror#23 454 and x17,x27,x26 455 bic x19,x20,x26 456 add x21,x21,x1 // h+=X[i] 457 orr x17,x17,x19 // Ch(e,f,g) 458 eor x19,x22,x23 // a^b, b^c in next round 459 eor x16,x16,x9,ror#18 // Sigma1(e) 460 ror x9,x22,#28 461 add x21,x21,x17 // h+=Ch(e,f,g) 462 eor x17,x22,x22,ror#5 463 add x21,x21,x16 // h+=Sigma1(e) 464 and x28,x28,x19 // (b^c)&=(a^b) 465 add x25,x25,x21 // d+=h 466 eor x28,x28,x23 // Maj(a,b,c) 467 eor x17,x9,x17,ror#34 // Sigma0(a) 468 add x21,x21,x28 // h+=Maj(a,b,c) 469 ldr x28,[x30],#8 // *K++, x19 in next round 470 //add x21,x21,x17 // h+=Sigma0(a) 471#ifndef __AARCH64EB__ 472 rev x2,x2 // 15 473#endif 474 ldr x7,[sp,#0] 475 add x21,x21,x17 // h+=Sigma0(a) 476 str x10,[sp,#24] 477 ror x16,x25,#14 478 add x20,x20,x28 // h+=K[i] 479 ror x9,x4,#1 480 and x17,x26,x25 481 ror x8,x1,#19 482 bic x28,x27,x25 483 ror x10,x21,#28 484 add x20,x20,x2 // h+=X[i] 485 eor x16,x16,x25,ror#18 486 eor x9,x9,x4,ror#8 487 orr x17,x17,x28 // Ch(e,f,g) 488 eor x28,x21,x22 // a^b, b^c in next round 489 eor x16,x16,x25,ror#41 // Sigma1(e) 490 eor x10,x10,x21,ror#34 491 add x20,x20,x17 // h+=Ch(e,f,g) 492 and x19,x19,x28 // (b^c)&=(a^b) 493 eor x8,x8,x1,ror#61 494 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 495 add x20,x20,x16 // h+=Sigma1(e) 496 eor x19,x19,x22 // Maj(a,b,c) 497 eor x17,x10,x21,ror#39 // Sigma0(a) 498 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 499 add x3,x3,x12 500 add x24,x24,x20 // d+=h 501 add x20,x20,x19 // h+=Maj(a,b,c) 502 ldr x19,[x30],#8 // *K++, x28 in next round 503 add x3,x3,x9 504 add x20,x20,x17 // h+=Sigma0(a) 505 add x3,x3,x8 506.Loop_16_xx: 507 ldr x8,[sp,#8] 508 str x11,[sp,#0] 509 ror x16,x24,#14 510 add x27,x27,x19 // h+=K[i] 511 ror x10,x5,#1 512 and x17,x25,x24 513 ror x9,x2,#19 514 bic x19,x26,x24 515 ror x11,x20,#28 516 add x27,x27,x3 // h+=X[i] 517 eor x16,x16,x24,ror#18 518 eor x10,x10,x5,ror#8 519 orr x17,x17,x19 // Ch(e,f,g) 520 eor x19,x20,x21 // a^b, b^c in next round 521 eor x16,x16,x24,ror#41 // Sigma1(e) 522 eor x11,x11,x20,ror#34 523 add x27,x27,x17 // h+=Ch(e,f,g) 524 and x28,x28,x19 // (b^c)&=(a^b) 525 eor x9,x9,x2,ror#61 526 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 527 add x27,x27,x16 // h+=Sigma1(e) 528 eor x28,x28,x21 // Maj(a,b,c) 529 eor x17,x11,x20,ror#39 // Sigma0(a) 530 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 531 add x4,x4,x13 532 add x23,x23,x27 // d+=h 533 add x27,x27,x28 // h+=Maj(a,b,c) 534 ldr x28,[x30],#8 // *K++, x19 in next round 535 add x4,x4,x10 536 add x27,x27,x17 // h+=Sigma0(a) 537 add x4,x4,x9 538 ldr x9,[sp,#16] 539 str x12,[sp,#8] 540 ror x16,x23,#14 541 add x26,x26,x28 // h+=K[i] 542 ror x11,x6,#1 543 and x17,x24,x23 544 ror x10,x3,#19 545 bic x28,x25,x23 546 ror x12,x27,#28 547 add x26,x26,x4 // h+=X[i] 548 eor x16,x16,x23,ror#18 549 eor x11,x11,x6,ror#8 550 orr x17,x17,x28 // Ch(e,f,g) 551 eor x28,x27,x20 // a^b, b^c in next round 552 eor x16,x16,x23,ror#41 // Sigma1(e) 553 eor x12,x12,x27,ror#34 554 add x26,x26,x17 // h+=Ch(e,f,g) 555 and x19,x19,x28 // (b^c)&=(a^b) 556 eor x10,x10,x3,ror#61 557 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 558 add x26,x26,x16 // h+=Sigma1(e) 559 eor x19,x19,x20 // Maj(a,b,c) 560 eor x17,x12,x27,ror#39 // Sigma0(a) 561 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 562 add x5,x5,x14 563 add x22,x22,x26 // d+=h 564 add x26,x26,x19 // h+=Maj(a,b,c) 565 ldr x19,[x30],#8 // *K++, x28 in next round 566 add x5,x5,x11 567 add x26,x26,x17 // h+=Sigma0(a) 568 add x5,x5,x10 569 ldr x10,[sp,#24] 570 str x13,[sp,#16] 571 ror x16,x22,#14 572 add x25,x25,x19 // h+=K[i] 573 ror x12,x7,#1 574 and x17,x23,x22 575 ror x11,x4,#19 576 bic x19,x24,x22 577 ror x13,x26,#28 578 add x25,x25,x5 // h+=X[i] 579 eor x16,x16,x22,ror#18 580 eor x12,x12,x7,ror#8 581 orr x17,x17,x19 // Ch(e,f,g) 582 eor x19,x26,x27 // a^b, b^c in next round 583 eor x16,x16,x22,ror#41 // Sigma1(e) 584 eor x13,x13,x26,ror#34 585 add x25,x25,x17 // h+=Ch(e,f,g) 586 and x28,x28,x19 // (b^c)&=(a^b) 587 eor x11,x11,x4,ror#61 588 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 589 add x25,x25,x16 // h+=Sigma1(e) 590 eor x28,x28,x27 // Maj(a,b,c) 591 eor x17,x13,x26,ror#39 // Sigma0(a) 592 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 593 add x6,x6,x15 594 add x21,x21,x25 // d+=h 595 add x25,x25,x28 // h+=Maj(a,b,c) 596 ldr x28,[x30],#8 // *K++, x19 in next round 597 add x6,x6,x12 598 add x25,x25,x17 // h+=Sigma0(a) 599 add x6,x6,x11 600 ldr x11,[sp,#0] 601 str x14,[sp,#24] 602 ror x16,x21,#14 603 add x24,x24,x28 // h+=K[i] 604 ror x13,x8,#1 605 and x17,x22,x21 606 ror x12,x5,#19 607 bic x28,x23,x21 608 ror x14,x25,#28 609 add x24,x24,x6 // h+=X[i] 610 eor x16,x16,x21,ror#18 611 eor x13,x13,x8,ror#8 612 orr x17,x17,x28 // Ch(e,f,g) 613 eor x28,x25,x26 // a^b, b^c in next round 614 eor x16,x16,x21,ror#41 // Sigma1(e) 615 eor x14,x14,x25,ror#34 616 add x24,x24,x17 // h+=Ch(e,f,g) 617 and x19,x19,x28 // (b^c)&=(a^b) 618 eor x12,x12,x5,ror#61 619 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 620 add x24,x24,x16 // h+=Sigma1(e) 621 eor x19,x19,x26 // Maj(a,b,c) 622 eor x17,x14,x25,ror#39 // Sigma0(a) 623 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 624 add x7,x7,x0 625 add x20,x20,x24 // d+=h 626 add x24,x24,x19 // h+=Maj(a,b,c) 627 ldr x19,[x30],#8 // *K++, x28 in next round 628 add x7,x7,x13 629 add x24,x24,x17 // h+=Sigma0(a) 630 add x7,x7,x12 631 ldr x12,[sp,#8] 632 str x15,[sp,#0] 633 ror x16,x20,#14 634 add x23,x23,x19 // h+=K[i] 635 ror x14,x9,#1 636 and x17,x21,x20 637 ror x13,x6,#19 638 bic x19,x22,x20 639 ror x15,x24,#28 640 add x23,x23,x7 // h+=X[i] 641 eor x16,x16,x20,ror#18 642 eor x14,x14,x9,ror#8 643 orr x17,x17,x19 // Ch(e,f,g) 644 eor x19,x24,x25 // a^b, b^c in next round 645 eor x16,x16,x20,ror#41 // Sigma1(e) 646 eor x15,x15,x24,ror#34 647 add x23,x23,x17 // h+=Ch(e,f,g) 648 and x28,x28,x19 // (b^c)&=(a^b) 649 eor x13,x13,x6,ror#61 650 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 651 add x23,x23,x16 // h+=Sigma1(e) 652 eor x28,x28,x25 // Maj(a,b,c) 653 eor x17,x15,x24,ror#39 // Sigma0(a) 654 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 655 add x8,x8,x1 656 add x27,x27,x23 // d+=h 657 add x23,x23,x28 // h+=Maj(a,b,c) 658 ldr x28,[x30],#8 // *K++, x19 in next round 659 add x8,x8,x14 660 add x23,x23,x17 // h+=Sigma0(a) 661 add x8,x8,x13 662 ldr x13,[sp,#16] 663 str x0,[sp,#8] 664 ror x16,x27,#14 665 add x22,x22,x28 // h+=K[i] 666 ror x15,x10,#1 667 and x17,x20,x27 668 ror x14,x7,#19 669 bic x28,x21,x27 670 ror x0,x23,#28 671 add x22,x22,x8 // h+=X[i] 672 eor x16,x16,x27,ror#18 673 eor x15,x15,x10,ror#8 674 orr x17,x17,x28 // Ch(e,f,g) 675 eor x28,x23,x24 // a^b, b^c in next round 676 eor x16,x16,x27,ror#41 // Sigma1(e) 677 eor x0,x0,x23,ror#34 678 add x22,x22,x17 // h+=Ch(e,f,g) 679 and x19,x19,x28 // (b^c)&=(a^b) 680 eor x14,x14,x7,ror#61 681 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 682 add x22,x22,x16 // h+=Sigma1(e) 683 eor x19,x19,x24 // Maj(a,b,c) 684 eor x17,x0,x23,ror#39 // Sigma0(a) 685 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 686 add x9,x9,x2 687 add x26,x26,x22 // d+=h 688 add x22,x22,x19 // h+=Maj(a,b,c) 689 ldr x19,[x30],#8 // *K++, x28 in next round 690 add x9,x9,x15 691 add x22,x22,x17 // h+=Sigma0(a) 692 add x9,x9,x14 693 ldr x14,[sp,#24] 694 str x1,[sp,#16] 695 ror x16,x26,#14 696 add x21,x21,x19 // h+=K[i] 697 ror x0,x11,#1 698 and x17,x27,x26 699 ror x15,x8,#19 700 bic x19,x20,x26 701 ror x1,x22,#28 702 add x21,x21,x9 // h+=X[i] 703 eor x16,x16,x26,ror#18 704 eor x0,x0,x11,ror#8 705 orr x17,x17,x19 // Ch(e,f,g) 706 eor x19,x22,x23 // a^b, b^c in next round 707 eor x16,x16,x26,ror#41 // Sigma1(e) 708 eor x1,x1,x22,ror#34 709 add x21,x21,x17 // h+=Ch(e,f,g) 710 and x28,x28,x19 // (b^c)&=(a^b) 711 eor x15,x15,x8,ror#61 712 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 713 add x21,x21,x16 // h+=Sigma1(e) 714 eor x28,x28,x23 // Maj(a,b,c) 715 eor x17,x1,x22,ror#39 // Sigma0(a) 716 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 717 add x10,x10,x3 718 add x25,x25,x21 // d+=h 719 add x21,x21,x28 // h+=Maj(a,b,c) 720 ldr x28,[x30],#8 // *K++, x19 in next round 721 add x10,x10,x0 722 add x21,x21,x17 // h+=Sigma0(a) 723 add x10,x10,x15 724 ldr x15,[sp,#0] 725 str x2,[sp,#24] 726 ror x16,x25,#14 727 add x20,x20,x28 // h+=K[i] 728 ror x1,x12,#1 729 and x17,x26,x25 730 ror x0,x9,#19 731 bic x28,x27,x25 732 ror x2,x21,#28 733 add x20,x20,x10 // h+=X[i] 734 eor x16,x16,x25,ror#18 735 eor x1,x1,x12,ror#8 736 orr x17,x17,x28 // Ch(e,f,g) 737 eor x28,x21,x22 // a^b, b^c in next round 738 eor x16,x16,x25,ror#41 // Sigma1(e) 739 eor x2,x2,x21,ror#34 740 add x20,x20,x17 // h+=Ch(e,f,g) 741 and x19,x19,x28 // (b^c)&=(a^b) 742 eor x0,x0,x9,ror#61 743 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 744 add x20,x20,x16 // h+=Sigma1(e) 745 eor x19,x19,x22 // Maj(a,b,c) 746 eor x17,x2,x21,ror#39 // Sigma0(a) 747 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 748 add x11,x11,x4 749 add x24,x24,x20 // d+=h 750 add x20,x20,x19 // h+=Maj(a,b,c) 751 ldr x19,[x30],#8 // *K++, x28 in next round 752 add x11,x11,x1 753 add x20,x20,x17 // h+=Sigma0(a) 754 add x11,x11,x0 755 ldr x0,[sp,#8] 756 str x3,[sp,#0] 757 ror x16,x24,#14 758 add x27,x27,x19 // h+=K[i] 759 ror x2,x13,#1 760 and x17,x25,x24 761 ror x1,x10,#19 762 bic x19,x26,x24 763 ror x3,x20,#28 764 add x27,x27,x11 // h+=X[i] 765 eor x16,x16,x24,ror#18 766 eor x2,x2,x13,ror#8 767 orr x17,x17,x19 // Ch(e,f,g) 768 eor x19,x20,x21 // a^b, b^c in next round 769 eor x16,x16,x24,ror#41 // Sigma1(e) 770 eor x3,x3,x20,ror#34 771 add x27,x27,x17 // h+=Ch(e,f,g) 772 and x28,x28,x19 // (b^c)&=(a^b) 773 eor x1,x1,x10,ror#61 774 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 775 add x27,x27,x16 // h+=Sigma1(e) 776 eor x28,x28,x21 // Maj(a,b,c) 777 eor x17,x3,x20,ror#39 // Sigma0(a) 778 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 779 add x12,x12,x5 780 add x23,x23,x27 // d+=h 781 add x27,x27,x28 // h+=Maj(a,b,c) 782 ldr x28,[x30],#8 // *K++, x19 in next round 783 add x12,x12,x2 784 add x27,x27,x17 // h+=Sigma0(a) 785 add x12,x12,x1 786 ldr x1,[sp,#16] 787 str x4,[sp,#8] 788 ror x16,x23,#14 789 add x26,x26,x28 // h+=K[i] 790 ror x3,x14,#1 791 and x17,x24,x23 792 ror x2,x11,#19 793 bic x28,x25,x23 794 ror x4,x27,#28 795 add x26,x26,x12 // h+=X[i] 796 eor x16,x16,x23,ror#18 797 eor x3,x3,x14,ror#8 798 orr x17,x17,x28 // Ch(e,f,g) 799 eor x28,x27,x20 // a^b, b^c in next round 800 eor x16,x16,x23,ror#41 // Sigma1(e) 801 eor x4,x4,x27,ror#34 802 add x26,x26,x17 // h+=Ch(e,f,g) 803 and x19,x19,x28 // (b^c)&=(a^b) 804 eor x2,x2,x11,ror#61 805 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 806 add x26,x26,x16 // h+=Sigma1(e) 807 eor x19,x19,x20 // Maj(a,b,c) 808 eor x17,x4,x27,ror#39 // Sigma0(a) 809 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 810 add x13,x13,x6 811 add x22,x22,x26 // d+=h 812 add x26,x26,x19 // h+=Maj(a,b,c) 813 ldr x19,[x30],#8 // *K++, x28 in next round 814 add x13,x13,x3 815 add x26,x26,x17 // h+=Sigma0(a) 816 add x13,x13,x2 817 ldr x2,[sp,#24] 818 str x5,[sp,#16] 819 ror x16,x22,#14 820 add x25,x25,x19 // h+=K[i] 821 ror x4,x15,#1 822 and x17,x23,x22 823 ror x3,x12,#19 824 bic x19,x24,x22 825 ror x5,x26,#28 826 add x25,x25,x13 // h+=X[i] 827 eor x16,x16,x22,ror#18 828 eor x4,x4,x15,ror#8 829 orr x17,x17,x19 // Ch(e,f,g) 830 eor x19,x26,x27 // a^b, b^c in next round 831 eor x16,x16,x22,ror#41 // Sigma1(e) 832 eor x5,x5,x26,ror#34 833 add x25,x25,x17 // h+=Ch(e,f,g) 834 and x28,x28,x19 // (b^c)&=(a^b) 835 eor x3,x3,x12,ror#61 836 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 837 add x25,x25,x16 // h+=Sigma1(e) 838 eor x28,x28,x27 // Maj(a,b,c) 839 eor x17,x5,x26,ror#39 // Sigma0(a) 840 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 841 add x14,x14,x7 842 add x21,x21,x25 // d+=h 843 add x25,x25,x28 // h+=Maj(a,b,c) 844 ldr x28,[x30],#8 // *K++, x19 in next round 845 add x14,x14,x4 846 add x25,x25,x17 // h+=Sigma0(a) 847 add x14,x14,x3 848 ldr x3,[sp,#0] 849 str x6,[sp,#24] 850 ror x16,x21,#14 851 add x24,x24,x28 // h+=K[i] 852 ror x5,x0,#1 853 and x17,x22,x21 854 ror x4,x13,#19 855 bic x28,x23,x21 856 ror x6,x25,#28 857 add x24,x24,x14 // h+=X[i] 858 eor x16,x16,x21,ror#18 859 eor x5,x5,x0,ror#8 860 orr x17,x17,x28 // Ch(e,f,g) 861 eor x28,x25,x26 // a^b, b^c in next round 862 eor x16,x16,x21,ror#41 // Sigma1(e) 863 eor x6,x6,x25,ror#34 864 add x24,x24,x17 // h+=Ch(e,f,g) 865 and x19,x19,x28 // (b^c)&=(a^b) 866 eor x4,x4,x13,ror#61 867 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 868 add x24,x24,x16 // h+=Sigma1(e) 869 eor x19,x19,x26 // Maj(a,b,c) 870 eor x17,x6,x25,ror#39 // Sigma0(a) 871 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 872 add x15,x15,x8 873 add x20,x20,x24 // d+=h 874 add x24,x24,x19 // h+=Maj(a,b,c) 875 ldr x19,[x30],#8 // *K++, x28 in next round 876 add x15,x15,x5 877 add x24,x24,x17 // h+=Sigma0(a) 878 add x15,x15,x4 879 ldr x4,[sp,#8] 880 str x7,[sp,#0] 881 ror x16,x20,#14 882 add x23,x23,x19 // h+=K[i] 883 ror x6,x1,#1 884 and x17,x21,x20 885 ror x5,x14,#19 886 bic x19,x22,x20 887 ror x7,x24,#28 888 add x23,x23,x15 // h+=X[i] 889 eor x16,x16,x20,ror#18 890 eor x6,x6,x1,ror#8 891 orr x17,x17,x19 // Ch(e,f,g) 892 eor x19,x24,x25 // a^b, b^c in next round 893 eor x16,x16,x20,ror#41 // Sigma1(e) 894 eor x7,x7,x24,ror#34 895 add x23,x23,x17 // h+=Ch(e,f,g) 896 and x28,x28,x19 // (b^c)&=(a^b) 897 eor x5,x5,x14,ror#61 898 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 899 add x23,x23,x16 // h+=Sigma1(e) 900 eor x28,x28,x25 // Maj(a,b,c) 901 eor x17,x7,x24,ror#39 // Sigma0(a) 902 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 903 add x0,x0,x9 904 add x27,x27,x23 // d+=h 905 add x23,x23,x28 // h+=Maj(a,b,c) 906 ldr x28,[x30],#8 // *K++, x19 in next round 907 add x0,x0,x6 908 add x23,x23,x17 // h+=Sigma0(a) 909 add x0,x0,x5 910 ldr x5,[sp,#16] 911 str x8,[sp,#8] 912 ror x16,x27,#14 913 add x22,x22,x28 // h+=K[i] 914 ror x7,x2,#1 915 and x17,x20,x27 916 ror x6,x15,#19 917 bic x28,x21,x27 918 ror x8,x23,#28 919 add x22,x22,x0 // h+=X[i] 920 eor x16,x16,x27,ror#18 921 eor x7,x7,x2,ror#8 922 orr x17,x17,x28 // Ch(e,f,g) 923 eor x28,x23,x24 // a^b, b^c in next round 924 eor x16,x16,x27,ror#41 // Sigma1(e) 925 eor x8,x8,x23,ror#34 926 add x22,x22,x17 // h+=Ch(e,f,g) 927 and x19,x19,x28 // (b^c)&=(a^b) 928 eor x6,x6,x15,ror#61 929 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 930 add x22,x22,x16 // h+=Sigma1(e) 931 eor x19,x19,x24 // Maj(a,b,c) 932 eor x17,x8,x23,ror#39 // Sigma0(a) 933 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 934 add x1,x1,x10 935 add x26,x26,x22 // d+=h 936 add x22,x22,x19 // h+=Maj(a,b,c) 937 ldr x19,[x30],#8 // *K++, x28 in next round 938 add x1,x1,x7 939 add x22,x22,x17 // h+=Sigma0(a) 940 add x1,x1,x6 941 ldr x6,[sp,#24] 942 str x9,[sp,#16] 943 ror x16,x26,#14 944 add x21,x21,x19 // h+=K[i] 945 ror x8,x3,#1 946 and x17,x27,x26 947 ror x7,x0,#19 948 bic x19,x20,x26 949 ror x9,x22,#28 950 add x21,x21,x1 // h+=X[i] 951 eor x16,x16,x26,ror#18 952 eor x8,x8,x3,ror#8 953 orr x17,x17,x19 // Ch(e,f,g) 954 eor x19,x22,x23 // a^b, b^c in next round 955 eor x16,x16,x26,ror#41 // Sigma1(e) 956 eor x9,x9,x22,ror#34 957 add x21,x21,x17 // h+=Ch(e,f,g) 958 and x28,x28,x19 // (b^c)&=(a^b) 959 eor x7,x7,x0,ror#61 960 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 961 add x21,x21,x16 // h+=Sigma1(e) 962 eor x28,x28,x23 // Maj(a,b,c) 963 eor x17,x9,x22,ror#39 // Sigma0(a) 964 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 965 add x2,x2,x11 966 add x25,x25,x21 // d+=h 967 add x21,x21,x28 // h+=Maj(a,b,c) 968 ldr x28,[x30],#8 // *K++, x19 in next round 969 add x2,x2,x8 970 add x21,x21,x17 // h+=Sigma0(a) 971 add x2,x2,x7 972 ldr x7,[sp,#0] 973 str x10,[sp,#24] 974 ror x16,x25,#14 975 add x20,x20,x28 // h+=K[i] 976 ror x9,x4,#1 977 and x17,x26,x25 978 ror x8,x1,#19 979 bic x28,x27,x25 980 ror x10,x21,#28 981 add x20,x20,x2 // h+=X[i] 982 eor x16,x16,x25,ror#18 983 eor x9,x9,x4,ror#8 984 orr x17,x17,x28 // Ch(e,f,g) 985 eor x28,x21,x22 // a^b, b^c in next round 986 eor x16,x16,x25,ror#41 // Sigma1(e) 987 eor x10,x10,x21,ror#34 988 add x20,x20,x17 // h+=Ch(e,f,g) 989 and x19,x19,x28 // (b^c)&=(a^b) 990 eor x8,x8,x1,ror#61 991 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 992 add x20,x20,x16 // h+=Sigma1(e) 993 eor x19,x19,x22 // Maj(a,b,c) 994 eor x17,x10,x21,ror#39 // Sigma0(a) 995 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 996 add x3,x3,x12 997 add x24,x24,x20 // d+=h 998 add x20,x20,x19 // h+=Maj(a,b,c) 999 ldr x19,[x30],#8 // *K++, x28 in next round 1000 add x3,x3,x9 1001 add x20,x20,x17 // h+=Sigma0(a) 1002 add x3,x3,x8 1003 cbnz x19,.Loop_16_xx 1004 1005 ldp x0,x2,[x29,#96] 1006 ldr x1,[x29,#112] 1007 sub x30,x30,#648 // rewind 1008 1009 ldp x3,x4,[x0] 1010 ldp x5,x6,[x0,#2*8] 1011 add x1,x1,#14*8 // advance input pointer 1012 ldp x7,x8,[x0,#4*8] 1013 add x20,x20,x3 1014 ldp x9,x10,[x0,#6*8] 1015 add x21,x21,x4 1016 add x22,x22,x5 1017 add x23,x23,x6 1018 stp x20,x21,[x0] 1019 add x24,x24,x7 1020 add x25,x25,x8 1021 stp x22,x23,[x0,#2*8] 1022 add x26,x26,x9 1023 add x27,x27,x10 1024 cmp x1,x2 1025 stp x24,x25,[x0,#4*8] 1026 stp x26,x27,[x0,#6*8] 1027 b.ne .Loop 1028 1029 ldp x19,x20,[x29,#16] 1030 add sp,sp,#4*8 1031 ldp x21,x22,[x29,#32] 1032 ldp x23,x24,[x29,#48] 1033 ldp x25,x26,[x29,#64] 1034 ldp x27,x28,[x29,#80] 1035 ldp x29,x30,[sp],#128 1036 ret 1037.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 1038 1039 1040.globl zfs_sha512_block_armv8 1041.type zfs_sha512_block_armv8,%function 1042.align 6 1043zfs_sha512_block_armv8: 1044 hint #34 // bti c 1045.Lv8_entry: 1046 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later 1047 stp x29,x30,[sp,#-16]! 1048 add x29,sp,#0 1049 1050 ld1 {v16.16b-v19.16b},[x1],#64 // load input 1051 ld1 {v20.16b-v23.16b},[x1],#64 1052 1053 ld1 {v0.2d-v3.2d},[x0] // load context 1054 adr x3,.LK512 1055 1056 rev64 v16.16b,v16.16b 1057 rev64 v17.16b,v17.16b 1058 rev64 v18.16b,v18.16b 1059 rev64 v19.16b,v19.16b 1060 rev64 v20.16b,v20.16b 1061 rev64 v21.16b,v21.16b 1062 rev64 v22.16b,v22.16b 1063 rev64 v23.16b,v23.16b 1064 b .Loop_hw 1065 1066.align 4 1067.Loop_hw: 1068 ld1 {v24.2d},[x3],#16 1069 subs x2,x2,#1 1070 sub x4,x1,#128 1071 orr v26.16b,v0.16b,v0.16b // offload 1072 orr v27.16b,v1.16b,v1.16b 1073 orr v28.16b,v2.16b,v2.16b 1074 orr v29.16b,v3.16b,v3.16b 1075 csel x1,x1,x4,ne // conditional rewind 1076 add v24.2d,v24.2d,v16.2d 1077 ld1 {v25.2d},[x3],#16 1078 ext v24.16b,v24.16b,v24.16b,#8 1079 ext v5.16b,v2.16b,v3.16b,#8 1080 ext v6.16b,v1.16b,v2.16b,#8 1081 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1082 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1083 ext v7.16b,v20.16b,v21.16b,#8 1084 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1085 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1086 add v4.2d,v1.2d,v3.2d // "D + T1" 1087 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1088 add v25.2d,v25.2d,v17.2d 1089 ld1 {v24.2d},[x3],#16 1090 ext v25.16b,v25.16b,v25.16b,#8 1091 ext v5.16b,v4.16b,v2.16b,#8 1092 ext v6.16b,v0.16b,v4.16b,#8 1093 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1094 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1095 ext v7.16b,v21.16b,v22.16b,#8 1096 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1097 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1098 add v1.2d,v0.2d,v2.2d // "D + T1" 1099 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1100 add v24.2d,v24.2d,v18.2d 1101 ld1 {v25.2d},[x3],#16 1102 ext v24.16b,v24.16b,v24.16b,#8 1103 ext v5.16b,v1.16b,v4.16b,#8 1104 ext v6.16b,v3.16b,v1.16b,#8 1105 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1106 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1107 ext v7.16b,v22.16b,v23.16b,#8 1108 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1109 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1110 add v0.2d,v3.2d,v4.2d // "D + T1" 1111 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1112 add v25.2d,v25.2d,v19.2d 1113 ld1 {v24.2d},[x3],#16 1114 ext v25.16b,v25.16b,v25.16b,#8 1115 ext v5.16b,v0.16b,v1.16b,#8 1116 ext v6.16b,v2.16b,v0.16b,#8 1117 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1118 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1119 ext v7.16b,v23.16b,v16.16b,#8 1120 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1121 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1122 add v3.2d,v2.2d,v1.2d // "D + T1" 1123 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1124 add v24.2d,v24.2d,v20.2d 1125 ld1 {v25.2d},[x3],#16 1126 ext v24.16b,v24.16b,v24.16b,#8 1127 ext v5.16b,v3.16b,v0.16b,#8 1128 ext v6.16b,v4.16b,v3.16b,#8 1129 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1130 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1131 ext v7.16b,v16.16b,v17.16b,#8 1132 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1133 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1134 add v2.2d,v4.2d,v0.2d // "D + T1" 1135 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1136 add v25.2d,v25.2d,v21.2d 1137 ld1 {v24.2d},[x3],#16 1138 ext v25.16b,v25.16b,v25.16b,#8 1139 ext v5.16b,v2.16b,v3.16b,#8 1140 ext v6.16b,v1.16b,v2.16b,#8 1141 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1142 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1143 ext v7.16b,v17.16b,v18.16b,#8 1144 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1145 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1146 add v4.2d,v1.2d,v3.2d // "D + T1" 1147 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1148 add v24.2d,v24.2d,v22.2d 1149 ld1 {v25.2d},[x3],#16 1150 ext v24.16b,v24.16b,v24.16b,#8 1151 ext v5.16b,v4.16b,v2.16b,#8 1152 ext v6.16b,v0.16b,v4.16b,#8 1153 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1154 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1155 ext v7.16b,v18.16b,v19.16b,#8 1156 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1157 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1158 add v1.2d,v0.2d,v2.2d // "D + T1" 1159 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1160 add v25.2d,v25.2d,v23.2d 1161 ld1 {v24.2d},[x3],#16 1162 ext v25.16b,v25.16b,v25.16b,#8 1163 ext v5.16b,v1.16b,v4.16b,#8 1164 ext v6.16b,v3.16b,v1.16b,#8 1165 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1166 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1167 ext v7.16b,v19.16b,v20.16b,#8 1168 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1169 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1170 add v0.2d,v3.2d,v4.2d // "D + T1" 1171 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1172 add v24.2d,v24.2d,v16.2d 1173 ld1 {v25.2d},[x3],#16 1174 ext v24.16b,v24.16b,v24.16b,#8 1175 ext v5.16b,v0.16b,v1.16b,#8 1176 ext v6.16b,v2.16b,v0.16b,#8 1177 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1178 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1179 ext v7.16b,v20.16b,v21.16b,#8 1180 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1181 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1182 add v3.2d,v2.2d,v1.2d // "D + T1" 1183 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1184 add v25.2d,v25.2d,v17.2d 1185 ld1 {v24.2d},[x3],#16 1186 ext v25.16b,v25.16b,v25.16b,#8 1187 ext v5.16b,v3.16b,v0.16b,#8 1188 ext v6.16b,v4.16b,v3.16b,#8 1189 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1190 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1191 ext v7.16b,v21.16b,v22.16b,#8 1192 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1193 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1194 add v2.2d,v4.2d,v0.2d // "D + T1" 1195 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1196 add v24.2d,v24.2d,v18.2d 1197 ld1 {v25.2d},[x3],#16 1198 ext v24.16b,v24.16b,v24.16b,#8 1199 ext v5.16b,v2.16b,v3.16b,#8 1200 ext v6.16b,v1.16b,v2.16b,#8 1201 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1202 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1203 ext v7.16b,v22.16b,v23.16b,#8 1204 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1205 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1206 add v4.2d,v1.2d,v3.2d // "D + T1" 1207 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1208 add v25.2d,v25.2d,v19.2d 1209 ld1 {v24.2d},[x3],#16 1210 ext v25.16b,v25.16b,v25.16b,#8 1211 ext v5.16b,v4.16b,v2.16b,#8 1212 ext v6.16b,v0.16b,v4.16b,#8 1213 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1214 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1215 ext v7.16b,v23.16b,v16.16b,#8 1216 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1217 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1218 add v1.2d,v0.2d,v2.2d // "D + T1" 1219 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1220 add v24.2d,v24.2d,v20.2d 1221 ld1 {v25.2d},[x3],#16 1222 ext v24.16b,v24.16b,v24.16b,#8 1223 ext v5.16b,v1.16b,v4.16b,#8 1224 ext v6.16b,v3.16b,v1.16b,#8 1225 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1226 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1227 ext v7.16b,v16.16b,v17.16b,#8 1228 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1229 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1230 add v0.2d,v3.2d,v4.2d // "D + T1" 1231 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1232 add v25.2d,v25.2d,v21.2d 1233 ld1 {v24.2d},[x3],#16 1234 ext v25.16b,v25.16b,v25.16b,#8 1235 ext v5.16b,v0.16b,v1.16b,#8 1236 ext v6.16b,v2.16b,v0.16b,#8 1237 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1238 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1239 ext v7.16b,v17.16b,v18.16b,#8 1240 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1241 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1242 add v3.2d,v2.2d,v1.2d // "D + T1" 1243 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1244 add v24.2d,v24.2d,v22.2d 1245 ld1 {v25.2d},[x3],#16 1246 ext v24.16b,v24.16b,v24.16b,#8 1247 ext v5.16b,v3.16b,v0.16b,#8 1248 ext v6.16b,v4.16b,v3.16b,#8 1249 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1250 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1251 ext v7.16b,v18.16b,v19.16b,#8 1252 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1253 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1254 add v2.2d,v4.2d,v0.2d // "D + T1" 1255 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1256 add v25.2d,v25.2d,v23.2d 1257 ld1 {v24.2d},[x3],#16 1258 ext v25.16b,v25.16b,v25.16b,#8 1259 ext v5.16b,v2.16b,v3.16b,#8 1260 ext v6.16b,v1.16b,v2.16b,#8 1261 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1262 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1263 ext v7.16b,v19.16b,v20.16b,#8 1264 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1265 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1266 add v4.2d,v1.2d,v3.2d // "D + T1" 1267 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1268 add v24.2d,v24.2d,v16.2d 1269 ld1 {v25.2d},[x3],#16 1270 ext v24.16b,v24.16b,v24.16b,#8 1271 ext v5.16b,v4.16b,v2.16b,#8 1272 ext v6.16b,v0.16b,v4.16b,#8 1273 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1274 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1275 ext v7.16b,v20.16b,v21.16b,#8 1276 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1277 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1278 add v1.2d,v0.2d,v2.2d // "D + T1" 1279 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1280 add v25.2d,v25.2d,v17.2d 1281 ld1 {v24.2d},[x3],#16 1282 ext v25.16b,v25.16b,v25.16b,#8 1283 ext v5.16b,v1.16b,v4.16b,#8 1284 ext v6.16b,v3.16b,v1.16b,#8 1285 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1286 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1287 ext v7.16b,v21.16b,v22.16b,#8 1288 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1289 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1290 add v0.2d,v3.2d,v4.2d // "D + T1" 1291 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1292 add v24.2d,v24.2d,v18.2d 1293 ld1 {v25.2d},[x3],#16 1294 ext v24.16b,v24.16b,v24.16b,#8 1295 ext v5.16b,v0.16b,v1.16b,#8 1296 ext v6.16b,v2.16b,v0.16b,#8 1297 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1298 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1299 ext v7.16b,v22.16b,v23.16b,#8 1300 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1301 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1302 add v3.2d,v2.2d,v1.2d // "D + T1" 1303 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1304 add v25.2d,v25.2d,v19.2d 1305 ld1 {v24.2d},[x3],#16 1306 ext v25.16b,v25.16b,v25.16b,#8 1307 ext v5.16b,v3.16b,v0.16b,#8 1308 ext v6.16b,v4.16b,v3.16b,#8 1309 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1310 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1311 ext v7.16b,v23.16b,v16.16b,#8 1312 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1313 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1314 add v2.2d,v4.2d,v0.2d // "D + T1" 1315 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1316 add v24.2d,v24.2d,v20.2d 1317 ld1 {v25.2d},[x3],#16 1318 ext v24.16b,v24.16b,v24.16b,#8 1319 ext v5.16b,v2.16b,v3.16b,#8 1320 ext v6.16b,v1.16b,v2.16b,#8 1321 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1322 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1323 ext v7.16b,v16.16b,v17.16b,#8 1324 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1325 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1326 add v4.2d,v1.2d,v3.2d // "D + T1" 1327 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1328 add v25.2d,v25.2d,v21.2d 1329 ld1 {v24.2d},[x3],#16 1330 ext v25.16b,v25.16b,v25.16b,#8 1331 ext v5.16b,v4.16b,v2.16b,#8 1332 ext v6.16b,v0.16b,v4.16b,#8 1333 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1334 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1335 ext v7.16b,v17.16b,v18.16b,#8 1336 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1337 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1338 add v1.2d,v0.2d,v2.2d // "D + T1" 1339 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1340 add v24.2d,v24.2d,v22.2d 1341 ld1 {v25.2d},[x3],#16 1342 ext v24.16b,v24.16b,v24.16b,#8 1343 ext v5.16b,v1.16b,v4.16b,#8 1344 ext v6.16b,v3.16b,v1.16b,#8 1345 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1346 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1347 ext v7.16b,v18.16b,v19.16b,#8 1348 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1349 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1350 add v0.2d,v3.2d,v4.2d // "D + T1" 1351 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1352 add v25.2d,v25.2d,v23.2d 1353 ld1 {v24.2d},[x3],#16 1354 ext v25.16b,v25.16b,v25.16b,#8 1355 ext v5.16b,v0.16b,v1.16b,#8 1356 ext v6.16b,v2.16b,v0.16b,#8 1357 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1358 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1359 ext v7.16b,v19.16b,v20.16b,#8 1360 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1361 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1362 add v3.2d,v2.2d,v1.2d // "D + T1" 1363 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1364 add v24.2d,v24.2d,v16.2d 1365 ld1 {v25.2d},[x3],#16 1366 ext v24.16b,v24.16b,v24.16b,#8 1367 ext v5.16b,v3.16b,v0.16b,#8 1368 ext v6.16b,v4.16b,v3.16b,#8 1369 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1370 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1371 ext v7.16b,v20.16b,v21.16b,#8 1372 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1373 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1374 add v2.2d,v4.2d,v0.2d // "D + T1" 1375 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1376 add v25.2d,v25.2d,v17.2d 1377 ld1 {v24.2d},[x3],#16 1378 ext v25.16b,v25.16b,v25.16b,#8 1379 ext v5.16b,v2.16b,v3.16b,#8 1380 ext v6.16b,v1.16b,v2.16b,#8 1381 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1382 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1383 ext v7.16b,v21.16b,v22.16b,#8 1384 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1385 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1386 add v4.2d,v1.2d,v3.2d // "D + T1" 1387 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1388 add v24.2d,v24.2d,v18.2d 1389 ld1 {v25.2d},[x3],#16 1390 ext v24.16b,v24.16b,v24.16b,#8 1391 ext v5.16b,v4.16b,v2.16b,#8 1392 ext v6.16b,v0.16b,v4.16b,#8 1393 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1394 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1395 ext v7.16b,v22.16b,v23.16b,#8 1396 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1397 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1398 add v1.2d,v0.2d,v2.2d // "D + T1" 1399 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1400 add v25.2d,v25.2d,v19.2d 1401 ld1 {v24.2d},[x3],#16 1402 ext v25.16b,v25.16b,v25.16b,#8 1403 ext v5.16b,v1.16b,v4.16b,#8 1404 ext v6.16b,v3.16b,v1.16b,#8 1405 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1406 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1407 ext v7.16b,v23.16b,v16.16b,#8 1408 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1409 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1410 add v0.2d,v3.2d,v4.2d // "D + T1" 1411 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1412 add v24.2d,v24.2d,v20.2d 1413 ld1 {v25.2d},[x3],#16 1414 ext v24.16b,v24.16b,v24.16b,#8 1415 ext v5.16b,v0.16b,v1.16b,#8 1416 ext v6.16b,v2.16b,v0.16b,#8 1417 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1418 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1419 ext v7.16b,v16.16b,v17.16b,#8 1420 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1421 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1422 add v3.2d,v2.2d,v1.2d // "D + T1" 1423 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1424 add v25.2d,v25.2d,v21.2d 1425 ld1 {v24.2d},[x3],#16 1426 ext v25.16b,v25.16b,v25.16b,#8 1427 ext v5.16b,v3.16b,v0.16b,#8 1428 ext v6.16b,v4.16b,v3.16b,#8 1429 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1430 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1431 ext v7.16b,v17.16b,v18.16b,#8 1432 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1433 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1434 add v2.2d,v4.2d,v0.2d // "D + T1" 1435 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1436 add v24.2d,v24.2d,v22.2d 1437 ld1 {v25.2d},[x3],#16 1438 ext v24.16b,v24.16b,v24.16b,#8 1439 ext v5.16b,v2.16b,v3.16b,#8 1440 ext v6.16b,v1.16b,v2.16b,#8 1441 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1442 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1443 ext v7.16b,v18.16b,v19.16b,#8 1444 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1445 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1446 add v4.2d,v1.2d,v3.2d // "D + T1" 1447 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1448 add v25.2d,v25.2d,v23.2d 1449 ld1 {v24.2d},[x3],#16 1450 ext v25.16b,v25.16b,v25.16b,#8 1451 ext v5.16b,v4.16b,v2.16b,#8 1452 ext v6.16b,v0.16b,v4.16b,#8 1453 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1454 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1455 ext v7.16b,v19.16b,v20.16b,#8 1456 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1457 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1458 add v1.2d,v0.2d,v2.2d // "D + T1" 1459 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1460 ld1 {v25.2d},[x3],#16 1461 add v24.2d,v24.2d,v16.2d 1462 ld1 {v16.16b},[x1],#16 // load next input 1463 ext v24.16b,v24.16b,v24.16b,#8 1464 ext v5.16b,v1.16b,v4.16b,#8 1465 ext v6.16b,v3.16b,v1.16b,#8 1466 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1467 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1468 rev64 v16.16b,v16.16b 1469 add v0.2d,v3.2d,v4.2d // "D + T1" 1470 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1471 ld1 {v24.2d},[x3],#16 1472 add v25.2d,v25.2d,v17.2d 1473 ld1 {v17.16b},[x1],#16 // load next input 1474 ext v25.16b,v25.16b,v25.16b,#8 1475 ext v5.16b,v0.16b,v1.16b,#8 1476 ext v6.16b,v2.16b,v0.16b,#8 1477 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1478 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1479 rev64 v17.16b,v17.16b 1480 add v3.2d,v2.2d,v1.2d // "D + T1" 1481 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1482 ld1 {v25.2d},[x3],#16 1483 add v24.2d,v24.2d,v18.2d 1484 ld1 {v18.16b},[x1],#16 // load next input 1485 ext v24.16b,v24.16b,v24.16b,#8 1486 ext v5.16b,v3.16b,v0.16b,#8 1487 ext v6.16b,v4.16b,v3.16b,#8 1488 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1489 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1490 rev64 v18.16b,v18.16b 1491 add v2.2d,v4.2d,v0.2d // "D + T1" 1492 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1493 ld1 {v24.2d},[x3],#16 1494 add v25.2d,v25.2d,v19.2d 1495 ld1 {v19.16b},[x1],#16 // load next input 1496 ext v25.16b,v25.16b,v25.16b,#8 1497 ext v5.16b,v2.16b,v3.16b,#8 1498 ext v6.16b,v1.16b,v2.16b,#8 1499 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1500 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1501 rev64 v19.16b,v19.16b 1502 add v4.2d,v1.2d,v3.2d // "D + T1" 1503 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1504 ld1 {v25.2d},[x3],#16 1505 add v24.2d,v24.2d,v20.2d 1506 ld1 {v20.16b},[x1],#16 // load next input 1507 ext v24.16b,v24.16b,v24.16b,#8 1508 ext v5.16b,v4.16b,v2.16b,#8 1509 ext v6.16b,v0.16b,v4.16b,#8 1510 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1511 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1512 rev64 v20.16b,v20.16b 1513 add v1.2d,v0.2d,v2.2d // "D + T1" 1514 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1515 ld1 {v24.2d},[x3],#16 1516 add v25.2d,v25.2d,v21.2d 1517 ld1 {v21.16b},[x1],#16 // load next input 1518 ext v25.16b,v25.16b,v25.16b,#8 1519 ext v5.16b,v1.16b,v4.16b,#8 1520 ext v6.16b,v3.16b,v1.16b,#8 1521 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1522 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1523 rev64 v21.16b,v21.16b 1524 add v0.2d,v3.2d,v4.2d // "D + T1" 1525 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1526 ld1 {v25.2d},[x3],#16 1527 add v24.2d,v24.2d,v22.2d 1528 ld1 {v22.16b},[x1],#16 // load next input 1529 ext v24.16b,v24.16b,v24.16b,#8 1530 ext v5.16b,v0.16b,v1.16b,#8 1531 ext v6.16b,v2.16b,v0.16b,#8 1532 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1533 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1534 rev64 v22.16b,v22.16b 1535 add v3.2d,v2.2d,v1.2d // "D + T1" 1536 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1537 sub x3,x3,#80*8 // rewind 1538 add v25.2d,v25.2d,v23.2d 1539 ld1 {v23.16b},[x1],#16 // load next input 1540 ext v25.16b,v25.16b,v25.16b,#8 1541 ext v5.16b,v3.16b,v0.16b,#8 1542 ext v6.16b,v4.16b,v3.16b,#8 1543 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1544 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1545 rev64 v23.16b,v23.16b 1546 add v2.2d,v4.2d,v0.2d // "D + T1" 1547 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1548 add v0.2d,v0.2d,v26.2d // accumulate 1549 add v1.2d,v1.2d,v27.2d 1550 add v2.2d,v2.2d,v28.2d 1551 add v3.2d,v3.2d,v29.2d 1552 1553 cbnz x2,.Loop_hw 1554 1555 st1 {v0.2d-v3.2d},[x0] // store context 1556 1557 ldr x29,[sp],#16 1558 ret 1559.size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8 1560#endif 1561