1/* 2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * https://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 19 * - modified assembly to fit into OpenZFS 20 */ 21 22#if defined(__aarch64__) 23 24.text 25 26.align 6 27.type .LK512,%object 28.LK512: 29 .quad 0x428a2f98d728ae22,0x7137449123ef65cd 30 .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc 31 .quad 0x3956c25bf348b538,0x59f111f1b605d019 32 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 33 .quad 0xd807aa98a3030242,0x12835b0145706fbe 34 .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 35 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 36 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 37 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 38 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 39 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 40 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 41 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 42 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 43 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 44 .quad 0x06ca6351e003826f,0x142929670a0e6e70 45 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 46 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df 47 .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 48 .quad 0x81c2c92e47edaee6,0x92722c851482353b 49 .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 50 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 51 .quad 0xd192e819d6ef5218,0xd69906245565a910 52 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 53 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 54 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 55 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb 56 .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 57 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 58 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec 59 .quad 0x90befffa23631e28,0xa4506cebde82bde9 60 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b 61 .quad 0xca273eceea26619c,0xd186b8c721c0c207 62 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 63 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 64 .quad 0x113f9804bef90dae,0x1b710b35131c471b 65 .quad 0x28db77f523047d84,0x32caab7b40c72493 66 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c 67 .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a 68 .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 69 .quad 0 // terminator 70.size .LK512,.-.LK512 71 72.globl zfs_sha512_block_armv7 73.type zfs_sha512_block_armv7,%function 74.align 6 75zfs_sha512_block_armv7: 76 stp x29,x30,[sp,#-128]! 77 add x29,sp,#0 78 79 stp x19,x20,[sp,#16] 80 stp x21,x22,[sp,#32] 81 stp x23,x24,[sp,#48] 82 stp x25,x26,[sp,#64] 83 stp x27,x28,[sp,#80] 84 sub sp,sp,#4*8 85 86 ldp x20,x21,[x0] // load context 87 ldp x22,x23,[x0,#2*8] 88 ldp x24,x25,[x0,#4*8] 89 add x2,x1,x2,lsl#7 // end of input 90 ldp x26,x27,[x0,#6*8] 91 adr x30,.LK512 92 stp x0,x2,[x29,#96] 93 94.Loop: 95 ldp x3,x4,[x1],#2*8 96 ldr x19,[x30],#8 // *K++ 97 eor x28,x21,x22 // magic seed 98 str x1,[x29,#112] 99#ifndef __AARCH64EB__ 100 rev x3,x3 // 0 101#endif 102 ror x16,x24,#14 103 add x27,x27,x19 // h+=K[i] 104 eor x6,x24,x24,ror#23 105 and x17,x25,x24 106 bic x19,x26,x24 107 add x27,x27,x3 // h+=X[i] 108 orr x17,x17,x19 // Ch(e,f,g) 109 eor x19,x20,x21 // a^b, b^c in next round 110 eor x16,x16,x6,ror#18 // Sigma1(e) 111 ror x6,x20,#28 112 add x27,x27,x17 // h+=Ch(e,f,g) 113 eor x17,x20,x20,ror#5 114 add x27,x27,x16 // h+=Sigma1(e) 115 and x28,x28,x19 // (b^c)&=(a^b) 116 add x23,x23,x27 // d+=h 117 eor x28,x28,x21 // Maj(a,b,c) 118 eor x17,x6,x17,ror#34 // Sigma0(a) 119 add x27,x27,x28 // h+=Maj(a,b,c) 120 ldr x28,[x30],#8 // *K++, x19 in next round 121 //add x27,x27,x17 // h+=Sigma0(a) 122#ifndef __AARCH64EB__ 123 rev x4,x4 // 1 124#endif 125 ldp x5,x6,[x1],#2*8 126 add x27,x27,x17 // h+=Sigma0(a) 127 ror x16,x23,#14 128 add x26,x26,x28 // h+=K[i] 129 eor x7,x23,x23,ror#23 130 and x17,x24,x23 131 bic x28,x25,x23 132 add x26,x26,x4 // h+=X[i] 133 orr x17,x17,x28 // Ch(e,f,g) 134 eor x28,x27,x20 // a^b, b^c in next round 135 eor x16,x16,x7,ror#18 // Sigma1(e) 136 ror x7,x27,#28 137 add x26,x26,x17 // h+=Ch(e,f,g) 138 eor x17,x27,x27,ror#5 139 add x26,x26,x16 // h+=Sigma1(e) 140 and x19,x19,x28 // (b^c)&=(a^b) 141 add x22,x22,x26 // d+=h 142 eor x19,x19,x20 // Maj(a,b,c) 143 eor x17,x7,x17,ror#34 // Sigma0(a) 144 add x26,x26,x19 // h+=Maj(a,b,c) 145 ldr x19,[x30],#8 // *K++, x28 in next round 146 //add x26,x26,x17 // h+=Sigma0(a) 147#ifndef __AARCH64EB__ 148 rev x5,x5 // 2 149#endif 150 add x26,x26,x17 // h+=Sigma0(a) 151 ror x16,x22,#14 152 add x25,x25,x19 // h+=K[i] 153 eor x8,x22,x22,ror#23 154 and x17,x23,x22 155 bic x19,x24,x22 156 add x25,x25,x5 // h+=X[i] 157 orr x17,x17,x19 // Ch(e,f,g) 158 eor x19,x26,x27 // a^b, b^c in next round 159 eor x16,x16,x8,ror#18 // Sigma1(e) 160 ror x8,x26,#28 161 add x25,x25,x17 // h+=Ch(e,f,g) 162 eor x17,x26,x26,ror#5 163 add x25,x25,x16 // h+=Sigma1(e) 164 and x28,x28,x19 // (b^c)&=(a^b) 165 add x21,x21,x25 // d+=h 166 eor x28,x28,x27 // Maj(a,b,c) 167 eor x17,x8,x17,ror#34 // Sigma0(a) 168 add x25,x25,x28 // h+=Maj(a,b,c) 169 ldr x28,[x30],#8 // *K++, x19 in next round 170 //add x25,x25,x17 // h+=Sigma0(a) 171#ifndef __AARCH64EB__ 172 rev x6,x6 // 3 173#endif 174 ldp x7,x8,[x1],#2*8 175 add x25,x25,x17 // h+=Sigma0(a) 176 ror x16,x21,#14 177 add x24,x24,x28 // h+=K[i] 178 eor x9,x21,x21,ror#23 179 and x17,x22,x21 180 bic x28,x23,x21 181 add x24,x24,x6 // h+=X[i] 182 orr x17,x17,x28 // Ch(e,f,g) 183 eor x28,x25,x26 // a^b, b^c in next round 184 eor x16,x16,x9,ror#18 // Sigma1(e) 185 ror x9,x25,#28 186 add x24,x24,x17 // h+=Ch(e,f,g) 187 eor x17,x25,x25,ror#5 188 add x24,x24,x16 // h+=Sigma1(e) 189 and x19,x19,x28 // (b^c)&=(a^b) 190 add x20,x20,x24 // d+=h 191 eor x19,x19,x26 // Maj(a,b,c) 192 eor x17,x9,x17,ror#34 // Sigma0(a) 193 add x24,x24,x19 // h+=Maj(a,b,c) 194 ldr x19,[x30],#8 // *K++, x28 in next round 195 //add x24,x24,x17 // h+=Sigma0(a) 196#ifndef __AARCH64EB__ 197 rev x7,x7 // 4 198#endif 199 add x24,x24,x17 // h+=Sigma0(a) 200 ror x16,x20,#14 201 add x23,x23,x19 // h+=K[i] 202 eor x10,x20,x20,ror#23 203 and x17,x21,x20 204 bic x19,x22,x20 205 add x23,x23,x7 // h+=X[i] 206 orr x17,x17,x19 // Ch(e,f,g) 207 eor x19,x24,x25 // a^b, b^c in next round 208 eor x16,x16,x10,ror#18 // Sigma1(e) 209 ror x10,x24,#28 210 add x23,x23,x17 // h+=Ch(e,f,g) 211 eor x17,x24,x24,ror#5 212 add x23,x23,x16 // h+=Sigma1(e) 213 and x28,x28,x19 // (b^c)&=(a^b) 214 add x27,x27,x23 // d+=h 215 eor x28,x28,x25 // Maj(a,b,c) 216 eor x17,x10,x17,ror#34 // Sigma0(a) 217 add x23,x23,x28 // h+=Maj(a,b,c) 218 ldr x28,[x30],#8 // *K++, x19 in next round 219 //add x23,x23,x17 // h+=Sigma0(a) 220#ifndef __AARCH64EB__ 221 rev x8,x8 // 5 222#endif 223 ldp x9,x10,[x1],#2*8 224 add x23,x23,x17 // h+=Sigma0(a) 225 ror x16,x27,#14 226 add x22,x22,x28 // h+=K[i] 227 eor x11,x27,x27,ror#23 228 and x17,x20,x27 229 bic x28,x21,x27 230 add x22,x22,x8 // h+=X[i] 231 orr x17,x17,x28 // Ch(e,f,g) 232 eor x28,x23,x24 // a^b, b^c in next round 233 eor x16,x16,x11,ror#18 // Sigma1(e) 234 ror x11,x23,#28 235 add x22,x22,x17 // h+=Ch(e,f,g) 236 eor x17,x23,x23,ror#5 237 add x22,x22,x16 // h+=Sigma1(e) 238 and x19,x19,x28 // (b^c)&=(a^b) 239 add x26,x26,x22 // d+=h 240 eor x19,x19,x24 // Maj(a,b,c) 241 eor x17,x11,x17,ror#34 // Sigma0(a) 242 add x22,x22,x19 // h+=Maj(a,b,c) 243 ldr x19,[x30],#8 // *K++, x28 in next round 244 //add x22,x22,x17 // h+=Sigma0(a) 245#ifndef __AARCH64EB__ 246 rev x9,x9 // 6 247#endif 248 add x22,x22,x17 // h+=Sigma0(a) 249 ror x16,x26,#14 250 add x21,x21,x19 // h+=K[i] 251 eor x12,x26,x26,ror#23 252 and x17,x27,x26 253 bic x19,x20,x26 254 add x21,x21,x9 // h+=X[i] 255 orr x17,x17,x19 // Ch(e,f,g) 256 eor x19,x22,x23 // a^b, b^c in next round 257 eor x16,x16,x12,ror#18 // Sigma1(e) 258 ror x12,x22,#28 259 add x21,x21,x17 // h+=Ch(e,f,g) 260 eor x17,x22,x22,ror#5 261 add x21,x21,x16 // h+=Sigma1(e) 262 and x28,x28,x19 // (b^c)&=(a^b) 263 add x25,x25,x21 // d+=h 264 eor x28,x28,x23 // Maj(a,b,c) 265 eor x17,x12,x17,ror#34 // Sigma0(a) 266 add x21,x21,x28 // h+=Maj(a,b,c) 267 ldr x28,[x30],#8 // *K++, x19 in next round 268 //add x21,x21,x17 // h+=Sigma0(a) 269#ifndef __AARCH64EB__ 270 rev x10,x10 // 7 271#endif 272 ldp x11,x12,[x1],#2*8 273 add x21,x21,x17 // h+=Sigma0(a) 274 ror x16,x25,#14 275 add x20,x20,x28 // h+=K[i] 276 eor x13,x25,x25,ror#23 277 and x17,x26,x25 278 bic x28,x27,x25 279 add x20,x20,x10 // h+=X[i] 280 orr x17,x17,x28 // Ch(e,f,g) 281 eor x28,x21,x22 // a^b, b^c in next round 282 eor x16,x16,x13,ror#18 // Sigma1(e) 283 ror x13,x21,#28 284 add x20,x20,x17 // h+=Ch(e,f,g) 285 eor x17,x21,x21,ror#5 286 add x20,x20,x16 // h+=Sigma1(e) 287 and x19,x19,x28 // (b^c)&=(a^b) 288 add x24,x24,x20 // d+=h 289 eor x19,x19,x22 // Maj(a,b,c) 290 eor x17,x13,x17,ror#34 // Sigma0(a) 291 add x20,x20,x19 // h+=Maj(a,b,c) 292 ldr x19,[x30],#8 // *K++, x28 in next round 293 //add x20,x20,x17 // h+=Sigma0(a) 294#ifndef __AARCH64EB__ 295 rev x11,x11 // 8 296#endif 297 add x20,x20,x17 // h+=Sigma0(a) 298 ror x16,x24,#14 299 add x27,x27,x19 // h+=K[i] 300 eor x14,x24,x24,ror#23 301 and x17,x25,x24 302 bic x19,x26,x24 303 add x27,x27,x11 // h+=X[i] 304 orr x17,x17,x19 // Ch(e,f,g) 305 eor x19,x20,x21 // a^b, b^c in next round 306 eor x16,x16,x14,ror#18 // Sigma1(e) 307 ror x14,x20,#28 308 add x27,x27,x17 // h+=Ch(e,f,g) 309 eor x17,x20,x20,ror#5 310 add x27,x27,x16 // h+=Sigma1(e) 311 and x28,x28,x19 // (b^c)&=(a^b) 312 add x23,x23,x27 // d+=h 313 eor x28,x28,x21 // Maj(a,b,c) 314 eor x17,x14,x17,ror#34 // Sigma0(a) 315 add x27,x27,x28 // h+=Maj(a,b,c) 316 ldr x28,[x30],#8 // *K++, x19 in next round 317 //add x27,x27,x17 // h+=Sigma0(a) 318#ifndef __AARCH64EB__ 319 rev x12,x12 // 9 320#endif 321 ldp x13,x14,[x1],#2*8 322 add x27,x27,x17 // h+=Sigma0(a) 323 ror x16,x23,#14 324 add x26,x26,x28 // h+=K[i] 325 eor x15,x23,x23,ror#23 326 and x17,x24,x23 327 bic x28,x25,x23 328 add x26,x26,x12 // h+=X[i] 329 orr x17,x17,x28 // Ch(e,f,g) 330 eor x28,x27,x20 // a^b, b^c in next round 331 eor x16,x16,x15,ror#18 // Sigma1(e) 332 ror x15,x27,#28 333 add x26,x26,x17 // h+=Ch(e,f,g) 334 eor x17,x27,x27,ror#5 335 add x26,x26,x16 // h+=Sigma1(e) 336 and x19,x19,x28 // (b^c)&=(a^b) 337 add x22,x22,x26 // d+=h 338 eor x19,x19,x20 // Maj(a,b,c) 339 eor x17,x15,x17,ror#34 // Sigma0(a) 340 add x26,x26,x19 // h+=Maj(a,b,c) 341 ldr x19,[x30],#8 // *K++, x28 in next round 342 //add x26,x26,x17 // h+=Sigma0(a) 343#ifndef __AARCH64EB__ 344 rev x13,x13 // 10 345#endif 346 add x26,x26,x17 // h+=Sigma0(a) 347 ror x16,x22,#14 348 add x25,x25,x19 // h+=K[i] 349 eor x0,x22,x22,ror#23 350 and x17,x23,x22 351 bic x19,x24,x22 352 add x25,x25,x13 // h+=X[i] 353 orr x17,x17,x19 // Ch(e,f,g) 354 eor x19,x26,x27 // a^b, b^c in next round 355 eor x16,x16,x0,ror#18 // Sigma1(e) 356 ror x0,x26,#28 357 add x25,x25,x17 // h+=Ch(e,f,g) 358 eor x17,x26,x26,ror#5 359 add x25,x25,x16 // h+=Sigma1(e) 360 and x28,x28,x19 // (b^c)&=(a^b) 361 add x21,x21,x25 // d+=h 362 eor x28,x28,x27 // Maj(a,b,c) 363 eor x17,x0,x17,ror#34 // Sigma0(a) 364 add x25,x25,x28 // h+=Maj(a,b,c) 365 ldr x28,[x30],#8 // *K++, x19 in next round 366 //add x25,x25,x17 // h+=Sigma0(a) 367#ifndef __AARCH64EB__ 368 rev x14,x14 // 11 369#endif 370 ldp x15,x0,[x1],#2*8 371 add x25,x25,x17 // h+=Sigma0(a) 372 str x6,[sp,#24] 373 ror x16,x21,#14 374 add x24,x24,x28 // h+=K[i] 375 eor x6,x21,x21,ror#23 376 and x17,x22,x21 377 bic x28,x23,x21 378 add x24,x24,x14 // h+=X[i] 379 orr x17,x17,x28 // Ch(e,f,g) 380 eor x28,x25,x26 // a^b, b^c in next round 381 eor x16,x16,x6,ror#18 // Sigma1(e) 382 ror x6,x25,#28 383 add x24,x24,x17 // h+=Ch(e,f,g) 384 eor x17,x25,x25,ror#5 385 add x24,x24,x16 // h+=Sigma1(e) 386 and x19,x19,x28 // (b^c)&=(a^b) 387 add x20,x20,x24 // d+=h 388 eor x19,x19,x26 // Maj(a,b,c) 389 eor x17,x6,x17,ror#34 // Sigma0(a) 390 add x24,x24,x19 // h+=Maj(a,b,c) 391 ldr x19,[x30],#8 // *K++, x28 in next round 392 //add x24,x24,x17 // h+=Sigma0(a) 393#ifndef __AARCH64EB__ 394 rev x15,x15 // 12 395#endif 396 add x24,x24,x17 // h+=Sigma0(a) 397 str x7,[sp,#0] 398 ror x16,x20,#14 399 add x23,x23,x19 // h+=K[i] 400 eor x7,x20,x20,ror#23 401 and x17,x21,x20 402 bic x19,x22,x20 403 add x23,x23,x15 // h+=X[i] 404 orr x17,x17,x19 // Ch(e,f,g) 405 eor x19,x24,x25 // a^b, b^c in next round 406 eor x16,x16,x7,ror#18 // Sigma1(e) 407 ror x7,x24,#28 408 add x23,x23,x17 // h+=Ch(e,f,g) 409 eor x17,x24,x24,ror#5 410 add x23,x23,x16 // h+=Sigma1(e) 411 and x28,x28,x19 // (b^c)&=(a^b) 412 add x27,x27,x23 // d+=h 413 eor x28,x28,x25 // Maj(a,b,c) 414 eor x17,x7,x17,ror#34 // Sigma0(a) 415 add x23,x23,x28 // h+=Maj(a,b,c) 416 ldr x28,[x30],#8 // *K++, x19 in next round 417 //add x23,x23,x17 // h+=Sigma0(a) 418#ifndef __AARCH64EB__ 419 rev x0,x0 // 13 420#endif 421 ldp x1,x2,[x1] 422 add x23,x23,x17 // h+=Sigma0(a) 423 str x8,[sp,#8] 424 ror x16,x27,#14 425 add x22,x22,x28 // h+=K[i] 426 eor x8,x27,x27,ror#23 427 and x17,x20,x27 428 bic x28,x21,x27 429 add x22,x22,x0 // h+=X[i] 430 orr x17,x17,x28 // Ch(e,f,g) 431 eor x28,x23,x24 // a^b, b^c in next round 432 eor x16,x16,x8,ror#18 // Sigma1(e) 433 ror x8,x23,#28 434 add x22,x22,x17 // h+=Ch(e,f,g) 435 eor x17,x23,x23,ror#5 436 add x22,x22,x16 // h+=Sigma1(e) 437 and x19,x19,x28 // (b^c)&=(a^b) 438 add x26,x26,x22 // d+=h 439 eor x19,x19,x24 // Maj(a,b,c) 440 eor x17,x8,x17,ror#34 // Sigma0(a) 441 add x22,x22,x19 // h+=Maj(a,b,c) 442 ldr x19,[x30],#8 // *K++, x28 in next round 443 //add x22,x22,x17 // h+=Sigma0(a) 444#ifndef __AARCH64EB__ 445 rev x1,x1 // 14 446#endif 447 ldr x6,[sp,#24] 448 add x22,x22,x17 // h+=Sigma0(a) 449 str x9,[sp,#16] 450 ror x16,x26,#14 451 add x21,x21,x19 // h+=K[i] 452 eor x9,x26,x26,ror#23 453 and x17,x27,x26 454 bic x19,x20,x26 455 add x21,x21,x1 // h+=X[i] 456 orr x17,x17,x19 // Ch(e,f,g) 457 eor x19,x22,x23 // a^b, b^c in next round 458 eor x16,x16,x9,ror#18 // Sigma1(e) 459 ror x9,x22,#28 460 add x21,x21,x17 // h+=Ch(e,f,g) 461 eor x17,x22,x22,ror#5 462 add x21,x21,x16 // h+=Sigma1(e) 463 and x28,x28,x19 // (b^c)&=(a^b) 464 add x25,x25,x21 // d+=h 465 eor x28,x28,x23 // Maj(a,b,c) 466 eor x17,x9,x17,ror#34 // Sigma0(a) 467 add x21,x21,x28 // h+=Maj(a,b,c) 468 ldr x28,[x30],#8 // *K++, x19 in next round 469 //add x21,x21,x17 // h+=Sigma0(a) 470#ifndef __AARCH64EB__ 471 rev x2,x2 // 15 472#endif 473 ldr x7,[sp,#0] 474 add x21,x21,x17 // h+=Sigma0(a) 475 str x10,[sp,#24] 476 ror x16,x25,#14 477 add x20,x20,x28 // h+=K[i] 478 ror x9,x4,#1 479 and x17,x26,x25 480 ror x8,x1,#19 481 bic x28,x27,x25 482 ror x10,x21,#28 483 add x20,x20,x2 // h+=X[i] 484 eor x16,x16,x25,ror#18 485 eor x9,x9,x4,ror#8 486 orr x17,x17,x28 // Ch(e,f,g) 487 eor x28,x21,x22 // a^b, b^c in next round 488 eor x16,x16,x25,ror#41 // Sigma1(e) 489 eor x10,x10,x21,ror#34 490 add x20,x20,x17 // h+=Ch(e,f,g) 491 and x19,x19,x28 // (b^c)&=(a^b) 492 eor x8,x8,x1,ror#61 493 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 494 add x20,x20,x16 // h+=Sigma1(e) 495 eor x19,x19,x22 // Maj(a,b,c) 496 eor x17,x10,x21,ror#39 // Sigma0(a) 497 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 498 add x3,x3,x12 499 add x24,x24,x20 // d+=h 500 add x20,x20,x19 // h+=Maj(a,b,c) 501 ldr x19,[x30],#8 // *K++, x28 in next round 502 add x3,x3,x9 503 add x20,x20,x17 // h+=Sigma0(a) 504 add x3,x3,x8 505.Loop_16_xx: 506 ldr x8,[sp,#8] 507 str x11,[sp,#0] 508 ror x16,x24,#14 509 add x27,x27,x19 // h+=K[i] 510 ror x10,x5,#1 511 and x17,x25,x24 512 ror x9,x2,#19 513 bic x19,x26,x24 514 ror x11,x20,#28 515 add x27,x27,x3 // h+=X[i] 516 eor x16,x16,x24,ror#18 517 eor x10,x10,x5,ror#8 518 orr x17,x17,x19 // Ch(e,f,g) 519 eor x19,x20,x21 // a^b, b^c in next round 520 eor x16,x16,x24,ror#41 // Sigma1(e) 521 eor x11,x11,x20,ror#34 522 add x27,x27,x17 // h+=Ch(e,f,g) 523 and x28,x28,x19 // (b^c)&=(a^b) 524 eor x9,x9,x2,ror#61 525 eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) 526 add x27,x27,x16 // h+=Sigma1(e) 527 eor x28,x28,x21 // Maj(a,b,c) 528 eor x17,x11,x20,ror#39 // Sigma0(a) 529 eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) 530 add x4,x4,x13 531 add x23,x23,x27 // d+=h 532 add x27,x27,x28 // h+=Maj(a,b,c) 533 ldr x28,[x30],#8 // *K++, x19 in next round 534 add x4,x4,x10 535 add x27,x27,x17 // h+=Sigma0(a) 536 add x4,x4,x9 537 ldr x9,[sp,#16] 538 str x12,[sp,#8] 539 ror x16,x23,#14 540 add x26,x26,x28 // h+=K[i] 541 ror x11,x6,#1 542 and x17,x24,x23 543 ror x10,x3,#19 544 bic x28,x25,x23 545 ror x12,x27,#28 546 add x26,x26,x4 // h+=X[i] 547 eor x16,x16,x23,ror#18 548 eor x11,x11,x6,ror#8 549 orr x17,x17,x28 // Ch(e,f,g) 550 eor x28,x27,x20 // a^b, b^c in next round 551 eor x16,x16,x23,ror#41 // Sigma1(e) 552 eor x12,x12,x27,ror#34 553 add x26,x26,x17 // h+=Ch(e,f,g) 554 and x19,x19,x28 // (b^c)&=(a^b) 555 eor x10,x10,x3,ror#61 556 eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) 557 add x26,x26,x16 // h+=Sigma1(e) 558 eor x19,x19,x20 // Maj(a,b,c) 559 eor x17,x12,x27,ror#39 // Sigma0(a) 560 eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) 561 add x5,x5,x14 562 add x22,x22,x26 // d+=h 563 add x26,x26,x19 // h+=Maj(a,b,c) 564 ldr x19,[x30],#8 // *K++, x28 in next round 565 add x5,x5,x11 566 add x26,x26,x17 // h+=Sigma0(a) 567 add x5,x5,x10 568 ldr x10,[sp,#24] 569 str x13,[sp,#16] 570 ror x16,x22,#14 571 add x25,x25,x19 // h+=K[i] 572 ror x12,x7,#1 573 and x17,x23,x22 574 ror x11,x4,#19 575 bic x19,x24,x22 576 ror x13,x26,#28 577 add x25,x25,x5 // h+=X[i] 578 eor x16,x16,x22,ror#18 579 eor x12,x12,x7,ror#8 580 orr x17,x17,x19 // Ch(e,f,g) 581 eor x19,x26,x27 // a^b, b^c in next round 582 eor x16,x16,x22,ror#41 // Sigma1(e) 583 eor x13,x13,x26,ror#34 584 add x25,x25,x17 // h+=Ch(e,f,g) 585 and x28,x28,x19 // (b^c)&=(a^b) 586 eor x11,x11,x4,ror#61 587 eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) 588 add x25,x25,x16 // h+=Sigma1(e) 589 eor x28,x28,x27 // Maj(a,b,c) 590 eor x17,x13,x26,ror#39 // Sigma0(a) 591 eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) 592 add x6,x6,x15 593 add x21,x21,x25 // d+=h 594 add x25,x25,x28 // h+=Maj(a,b,c) 595 ldr x28,[x30],#8 // *K++, x19 in next round 596 add x6,x6,x12 597 add x25,x25,x17 // h+=Sigma0(a) 598 add x6,x6,x11 599 ldr x11,[sp,#0] 600 str x14,[sp,#24] 601 ror x16,x21,#14 602 add x24,x24,x28 // h+=K[i] 603 ror x13,x8,#1 604 and x17,x22,x21 605 ror x12,x5,#19 606 bic x28,x23,x21 607 ror x14,x25,#28 608 add x24,x24,x6 // h+=X[i] 609 eor x16,x16,x21,ror#18 610 eor x13,x13,x8,ror#8 611 orr x17,x17,x28 // Ch(e,f,g) 612 eor x28,x25,x26 // a^b, b^c in next round 613 eor x16,x16,x21,ror#41 // Sigma1(e) 614 eor x14,x14,x25,ror#34 615 add x24,x24,x17 // h+=Ch(e,f,g) 616 and x19,x19,x28 // (b^c)&=(a^b) 617 eor x12,x12,x5,ror#61 618 eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) 619 add x24,x24,x16 // h+=Sigma1(e) 620 eor x19,x19,x26 // Maj(a,b,c) 621 eor x17,x14,x25,ror#39 // Sigma0(a) 622 eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) 623 add x7,x7,x0 624 add x20,x20,x24 // d+=h 625 add x24,x24,x19 // h+=Maj(a,b,c) 626 ldr x19,[x30],#8 // *K++, x28 in next round 627 add x7,x7,x13 628 add x24,x24,x17 // h+=Sigma0(a) 629 add x7,x7,x12 630 ldr x12,[sp,#8] 631 str x15,[sp,#0] 632 ror x16,x20,#14 633 add x23,x23,x19 // h+=K[i] 634 ror x14,x9,#1 635 and x17,x21,x20 636 ror x13,x6,#19 637 bic x19,x22,x20 638 ror x15,x24,#28 639 add x23,x23,x7 // h+=X[i] 640 eor x16,x16,x20,ror#18 641 eor x14,x14,x9,ror#8 642 orr x17,x17,x19 // Ch(e,f,g) 643 eor x19,x24,x25 // a^b, b^c in next round 644 eor x16,x16,x20,ror#41 // Sigma1(e) 645 eor x15,x15,x24,ror#34 646 add x23,x23,x17 // h+=Ch(e,f,g) 647 and x28,x28,x19 // (b^c)&=(a^b) 648 eor x13,x13,x6,ror#61 649 eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) 650 add x23,x23,x16 // h+=Sigma1(e) 651 eor x28,x28,x25 // Maj(a,b,c) 652 eor x17,x15,x24,ror#39 // Sigma0(a) 653 eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) 654 add x8,x8,x1 655 add x27,x27,x23 // d+=h 656 add x23,x23,x28 // h+=Maj(a,b,c) 657 ldr x28,[x30],#8 // *K++, x19 in next round 658 add x8,x8,x14 659 add x23,x23,x17 // h+=Sigma0(a) 660 add x8,x8,x13 661 ldr x13,[sp,#16] 662 str x0,[sp,#8] 663 ror x16,x27,#14 664 add x22,x22,x28 // h+=K[i] 665 ror x15,x10,#1 666 and x17,x20,x27 667 ror x14,x7,#19 668 bic x28,x21,x27 669 ror x0,x23,#28 670 add x22,x22,x8 // h+=X[i] 671 eor x16,x16,x27,ror#18 672 eor x15,x15,x10,ror#8 673 orr x17,x17,x28 // Ch(e,f,g) 674 eor x28,x23,x24 // a^b, b^c in next round 675 eor x16,x16,x27,ror#41 // Sigma1(e) 676 eor x0,x0,x23,ror#34 677 add x22,x22,x17 // h+=Ch(e,f,g) 678 and x19,x19,x28 // (b^c)&=(a^b) 679 eor x14,x14,x7,ror#61 680 eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) 681 add x22,x22,x16 // h+=Sigma1(e) 682 eor x19,x19,x24 // Maj(a,b,c) 683 eor x17,x0,x23,ror#39 // Sigma0(a) 684 eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) 685 add x9,x9,x2 686 add x26,x26,x22 // d+=h 687 add x22,x22,x19 // h+=Maj(a,b,c) 688 ldr x19,[x30],#8 // *K++, x28 in next round 689 add x9,x9,x15 690 add x22,x22,x17 // h+=Sigma0(a) 691 add x9,x9,x14 692 ldr x14,[sp,#24] 693 str x1,[sp,#16] 694 ror x16,x26,#14 695 add x21,x21,x19 // h+=K[i] 696 ror x0,x11,#1 697 and x17,x27,x26 698 ror x15,x8,#19 699 bic x19,x20,x26 700 ror x1,x22,#28 701 add x21,x21,x9 // h+=X[i] 702 eor x16,x16,x26,ror#18 703 eor x0,x0,x11,ror#8 704 orr x17,x17,x19 // Ch(e,f,g) 705 eor x19,x22,x23 // a^b, b^c in next round 706 eor x16,x16,x26,ror#41 // Sigma1(e) 707 eor x1,x1,x22,ror#34 708 add x21,x21,x17 // h+=Ch(e,f,g) 709 and x28,x28,x19 // (b^c)&=(a^b) 710 eor x15,x15,x8,ror#61 711 eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) 712 add x21,x21,x16 // h+=Sigma1(e) 713 eor x28,x28,x23 // Maj(a,b,c) 714 eor x17,x1,x22,ror#39 // Sigma0(a) 715 eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) 716 add x10,x10,x3 717 add x25,x25,x21 // d+=h 718 add x21,x21,x28 // h+=Maj(a,b,c) 719 ldr x28,[x30],#8 // *K++, x19 in next round 720 add x10,x10,x0 721 add x21,x21,x17 // h+=Sigma0(a) 722 add x10,x10,x15 723 ldr x15,[sp,#0] 724 str x2,[sp,#24] 725 ror x16,x25,#14 726 add x20,x20,x28 // h+=K[i] 727 ror x1,x12,#1 728 and x17,x26,x25 729 ror x0,x9,#19 730 bic x28,x27,x25 731 ror x2,x21,#28 732 add x20,x20,x10 // h+=X[i] 733 eor x16,x16,x25,ror#18 734 eor x1,x1,x12,ror#8 735 orr x17,x17,x28 // Ch(e,f,g) 736 eor x28,x21,x22 // a^b, b^c in next round 737 eor x16,x16,x25,ror#41 // Sigma1(e) 738 eor x2,x2,x21,ror#34 739 add x20,x20,x17 // h+=Ch(e,f,g) 740 and x19,x19,x28 // (b^c)&=(a^b) 741 eor x0,x0,x9,ror#61 742 eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) 743 add x20,x20,x16 // h+=Sigma1(e) 744 eor x19,x19,x22 // Maj(a,b,c) 745 eor x17,x2,x21,ror#39 // Sigma0(a) 746 eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) 747 add x11,x11,x4 748 add x24,x24,x20 // d+=h 749 add x20,x20,x19 // h+=Maj(a,b,c) 750 ldr x19,[x30],#8 // *K++, x28 in next round 751 add x11,x11,x1 752 add x20,x20,x17 // h+=Sigma0(a) 753 add x11,x11,x0 754 ldr x0,[sp,#8] 755 str x3,[sp,#0] 756 ror x16,x24,#14 757 add x27,x27,x19 // h+=K[i] 758 ror x2,x13,#1 759 and x17,x25,x24 760 ror x1,x10,#19 761 bic x19,x26,x24 762 ror x3,x20,#28 763 add x27,x27,x11 // h+=X[i] 764 eor x16,x16,x24,ror#18 765 eor x2,x2,x13,ror#8 766 orr x17,x17,x19 // Ch(e,f,g) 767 eor x19,x20,x21 // a^b, b^c in next round 768 eor x16,x16,x24,ror#41 // Sigma1(e) 769 eor x3,x3,x20,ror#34 770 add x27,x27,x17 // h+=Ch(e,f,g) 771 and x28,x28,x19 // (b^c)&=(a^b) 772 eor x1,x1,x10,ror#61 773 eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) 774 add x27,x27,x16 // h+=Sigma1(e) 775 eor x28,x28,x21 // Maj(a,b,c) 776 eor x17,x3,x20,ror#39 // Sigma0(a) 777 eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) 778 add x12,x12,x5 779 add x23,x23,x27 // d+=h 780 add x27,x27,x28 // h+=Maj(a,b,c) 781 ldr x28,[x30],#8 // *K++, x19 in next round 782 add x12,x12,x2 783 add x27,x27,x17 // h+=Sigma0(a) 784 add x12,x12,x1 785 ldr x1,[sp,#16] 786 str x4,[sp,#8] 787 ror x16,x23,#14 788 add x26,x26,x28 // h+=K[i] 789 ror x3,x14,#1 790 and x17,x24,x23 791 ror x2,x11,#19 792 bic x28,x25,x23 793 ror x4,x27,#28 794 add x26,x26,x12 // h+=X[i] 795 eor x16,x16,x23,ror#18 796 eor x3,x3,x14,ror#8 797 orr x17,x17,x28 // Ch(e,f,g) 798 eor x28,x27,x20 // a^b, b^c in next round 799 eor x16,x16,x23,ror#41 // Sigma1(e) 800 eor x4,x4,x27,ror#34 801 add x26,x26,x17 // h+=Ch(e,f,g) 802 and x19,x19,x28 // (b^c)&=(a^b) 803 eor x2,x2,x11,ror#61 804 eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) 805 add x26,x26,x16 // h+=Sigma1(e) 806 eor x19,x19,x20 // Maj(a,b,c) 807 eor x17,x4,x27,ror#39 // Sigma0(a) 808 eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) 809 add x13,x13,x6 810 add x22,x22,x26 // d+=h 811 add x26,x26,x19 // h+=Maj(a,b,c) 812 ldr x19,[x30],#8 // *K++, x28 in next round 813 add x13,x13,x3 814 add x26,x26,x17 // h+=Sigma0(a) 815 add x13,x13,x2 816 ldr x2,[sp,#24] 817 str x5,[sp,#16] 818 ror x16,x22,#14 819 add x25,x25,x19 // h+=K[i] 820 ror x4,x15,#1 821 and x17,x23,x22 822 ror x3,x12,#19 823 bic x19,x24,x22 824 ror x5,x26,#28 825 add x25,x25,x13 // h+=X[i] 826 eor x16,x16,x22,ror#18 827 eor x4,x4,x15,ror#8 828 orr x17,x17,x19 // Ch(e,f,g) 829 eor x19,x26,x27 // a^b, b^c in next round 830 eor x16,x16,x22,ror#41 // Sigma1(e) 831 eor x5,x5,x26,ror#34 832 add x25,x25,x17 // h+=Ch(e,f,g) 833 and x28,x28,x19 // (b^c)&=(a^b) 834 eor x3,x3,x12,ror#61 835 eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) 836 add x25,x25,x16 // h+=Sigma1(e) 837 eor x28,x28,x27 // Maj(a,b,c) 838 eor x17,x5,x26,ror#39 // Sigma0(a) 839 eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) 840 add x14,x14,x7 841 add x21,x21,x25 // d+=h 842 add x25,x25,x28 // h+=Maj(a,b,c) 843 ldr x28,[x30],#8 // *K++, x19 in next round 844 add x14,x14,x4 845 add x25,x25,x17 // h+=Sigma0(a) 846 add x14,x14,x3 847 ldr x3,[sp,#0] 848 str x6,[sp,#24] 849 ror x16,x21,#14 850 add x24,x24,x28 // h+=K[i] 851 ror x5,x0,#1 852 and x17,x22,x21 853 ror x4,x13,#19 854 bic x28,x23,x21 855 ror x6,x25,#28 856 add x24,x24,x14 // h+=X[i] 857 eor x16,x16,x21,ror#18 858 eor x5,x5,x0,ror#8 859 orr x17,x17,x28 // Ch(e,f,g) 860 eor x28,x25,x26 // a^b, b^c in next round 861 eor x16,x16,x21,ror#41 // Sigma1(e) 862 eor x6,x6,x25,ror#34 863 add x24,x24,x17 // h+=Ch(e,f,g) 864 and x19,x19,x28 // (b^c)&=(a^b) 865 eor x4,x4,x13,ror#61 866 eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) 867 add x24,x24,x16 // h+=Sigma1(e) 868 eor x19,x19,x26 // Maj(a,b,c) 869 eor x17,x6,x25,ror#39 // Sigma0(a) 870 eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) 871 add x15,x15,x8 872 add x20,x20,x24 // d+=h 873 add x24,x24,x19 // h+=Maj(a,b,c) 874 ldr x19,[x30],#8 // *K++, x28 in next round 875 add x15,x15,x5 876 add x24,x24,x17 // h+=Sigma0(a) 877 add x15,x15,x4 878 ldr x4,[sp,#8] 879 str x7,[sp,#0] 880 ror x16,x20,#14 881 add x23,x23,x19 // h+=K[i] 882 ror x6,x1,#1 883 and x17,x21,x20 884 ror x5,x14,#19 885 bic x19,x22,x20 886 ror x7,x24,#28 887 add x23,x23,x15 // h+=X[i] 888 eor x16,x16,x20,ror#18 889 eor x6,x6,x1,ror#8 890 orr x17,x17,x19 // Ch(e,f,g) 891 eor x19,x24,x25 // a^b, b^c in next round 892 eor x16,x16,x20,ror#41 // Sigma1(e) 893 eor x7,x7,x24,ror#34 894 add x23,x23,x17 // h+=Ch(e,f,g) 895 and x28,x28,x19 // (b^c)&=(a^b) 896 eor x5,x5,x14,ror#61 897 eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) 898 add x23,x23,x16 // h+=Sigma1(e) 899 eor x28,x28,x25 // Maj(a,b,c) 900 eor x17,x7,x24,ror#39 // Sigma0(a) 901 eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) 902 add x0,x0,x9 903 add x27,x27,x23 // d+=h 904 add x23,x23,x28 // h+=Maj(a,b,c) 905 ldr x28,[x30],#8 // *K++, x19 in next round 906 add x0,x0,x6 907 add x23,x23,x17 // h+=Sigma0(a) 908 add x0,x0,x5 909 ldr x5,[sp,#16] 910 str x8,[sp,#8] 911 ror x16,x27,#14 912 add x22,x22,x28 // h+=K[i] 913 ror x7,x2,#1 914 and x17,x20,x27 915 ror x6,x15,#19 916 bic x28,x21,x27 917 ror x8,x23,#28 918 add x22,x22,x0 // h+=X[i] 919 eor x16,x16,x27,ror#18 920 eor x7,x7,x2,ror#8 921 orr x17,x17,x28 // Ch(e,f,g) 922 eor x28,x23,x24 // a^b, b^c in next round 923 eor x16,x16,x27,ror#41 // Sigma1(e) 924 eor x8,x8,x23,ror#34 925 add x22,x22,x17 // h+=Ch(e,f,g) 926 and x19,x19,x28 // (b^c)&=(a^b) 927 eor x6,x6,x15,ror#61 928 eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) 929 add x22,x22,x16 // h+=Sigma1(e) 930 eor x19,x19,x24 // Maj(a,b,c) 931 eor x17,x8,x23,ror#39 // Sigma0(a) 932 eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) 933 add x1,x1,x10 934 add x26,x26,x22 // d+=h 935 add x22,x22,x19 // h+=Maj(a,b,c) 936 ldr x19,[x30],#8 // *K++, x28 in next round 937 add x1,x1,x7 938 add x22,x22,x17 // h+=Sigma0(a) 939 add x1,x1,x6 940 ldr x6,[sp,#24] 941 str x9,[sp,#16] 942 ror x16,x26,#14 943 add x21,x21,x19 // h+=K[i] 944 ror x8,x3,#1 945 and x17,x27,x26 946 ror x7,x0,#19 947 bic x19,x20,x26 948 ror x9,x22,#28 949 add x21,x21,x1 // h+=X[i] 950 eor x16,x16,x26,ror#18 951 eor x8,x8,x3,ror#8 952 orr x17,x17,x19 // Ch(e,f,g) 953 eor x19,x22,x23 // a^b, b^c in next round 954 eor x16,x16,x26,ror#41 // Sigma1(e) 955 eor x9,x9,x22,ror#34 956 add x21,x21,x17 // h+=Ch(e,f,g) 957 and x28,x28,x19 // (b^c)&=(a^b) 958 eor x7,x7,x0,ror#61 959 eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) 960 add x21,x21,x16 // h+=Sigma1(e) 961 eor x28,x28,x23 // Maj(a,b,c) 962 eor x17,x9,x22,ror#39 // Sigma0(a) 963 eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) 964 add x2,x2,x11 965 add x25,x25,x21 // d+=h 966 add x21,x21,x28 // h+=Maj(a,b,c) 967 ldr x28,[x30],#8 // *K++, x19 in next round 968 add x2,x2,x8 969 add x21,x21,x17 // h+=Sigma0(a) 970 add x2,x2,x7 971 ldr x7,[sp,#0] 972 str x10,[sp,#24] 973 ror x16,x25,#14 974 add x20,x20,x28 // h+=K[i] 975 ror x9,x4,#1 976 and x17,x26,x25 977 ror x8,x1,#19 978 bic x28,x27,x25 979 ror x10,x21,#28 980 add x20,x20,x2 // h+=X[i] 981 eor x16,x16,x25,ror#18 982 eor x9,x9,x4,ror#8 983 orr x17,x17,x28 // Ch(e,f,g) 984 eor x28,x21,x22 // a^b, b^c in next round 985 eor x16,x16,x25,ror#41 // Sigma1(e) 986 eor x10,x10,x21,ror#34 987 add x20,x20,x17 // h+=Ch(e,f,g) 988 and x19,x19,x28 // (b^c)&=(a^b) 989 eor x8,x8,x1,ror#61 990 eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) 991 add x20,x20,x16 // h+=Sigma1(e) 992 eor x19,x19,x22 // Maj(a,b,c) 993 eor x17,x10,x21,ror#39 // Sigma0(a) 994 eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) 995 add x3,x3,x12 996 add x24,x24,x20 // d+=h 997 add x20,x20,x19 // h+=Maj(a,b,c) 998 ldr x19,[x30],#8 // *K++, x28 in next round 999 add x3,x3,x9 1000 add x20,x20,x17 // h+=Sigma0(a) 1001 add x3,x3,x8 1002 cbnz x19,.Loop_16_xx 1003 1004 ldp x0,x2,[x29,#96] 1005 ldr x1,[x29,#112] 1006 sub x30,x30,#648 // rewind 1007 1008 ldp x3,x4,[x0] 1009 ldp x5,x6,[x0,#2*8] 1010 add x1,x1,#14*8 // advance input pointer 1011 ldp x7,x8,[x0,#4*8] 1012 add x20,x20,x3 1013 ldp x9,x10,[x0,#6*8] 1014 add x21,x21,x4 1015 add x22,x22,x5 1016 add x23,x23,x6 1017 stp x20,x21,[x0] 1018 add x24,x24,x7 1019 add x25,x25,x8 1020 stp x22,x23,[x0,#2*8] 1021 add x26,x26,x9 1022 add x27,x27,x10 1023 cmp x1,x2 1024 stp x24,x25,[x0,#4*8] 1025 stp x26,x27,[x0,#6*8] 1026 b.ne .Loop 1027 1028 ldp x19,x20,[x29,#16] 1029 add sp,sp,#4*8 1030 ldp x21,x22,[x29,#32] 1031 ldp x23,x24,[x29,#48] 1032 ldp x25,x26,[x29,#64] 1033 ldp x27,x28,[x29,#80] 1034 ldp x29,x30,[sp],#128 1035 ret 1036.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 1037 1038 1039.globl zfs_sha512_block_armv8 1040.type zfs_sha512_block_armv8,%function 1041.align 6 1042zfs_sha512_block_armv8: 1043.Lv8_entry: 1044 // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later 1045 stp x29,x30,[sp,#-16]! 1046 add x29,sp,#0 1047 1048 ld1 {v16.16b-v19.16b},[x1],#64 // load input 1049 ld1 {v20.16b-v23.16b},[x1],#64 1050 1051 ld1 {v0.2d-v3.2d},[x0] // load context 1052 adr x3,.LK512 1053 1054 rev64 v16.16b,v16.16b 1055 rev64 v17.16b,v17.16b 1056 rev64 v18.16b,v18.16b 1057 rev64 v19.16b,v19.16b 1058 rev64 v20.16b,v20.16b 1059 rev64 v21.16b,v21.16b 1060 rev64 v22.16b,v22.16b 1061 rev64 v23.16b,v23.16b 1062 b .Loop_hw 1063 1064.align 4 1065.Loop_hw: 1066 ld1 {v24.2d},[x3],#16 1067 subs x2,x2,#1 1068 sub x4,x1,#128 1069 orr v26.16b,v0.16b,v0.16b // offload 1070 orr v27.16b,v1.16b,v1.16b 1071 orr v28.16b,v2.16b,v2.16b 1072 orr v29.16b,v3.16b,v3.16b 1073 csel x1,x1,x4,ne // conditional rewind 1074 add v24.2d,v24.2d,v16.2d 1075 ld1 {v25.2d},[x3],#16 1076 ext v24.16b,v24.16b,v24.16b,#8 1077 ext v5.16b,v2.16b,v3.16b,#8 1078 ext v6.16b,v1.16b,v2.16b,#8 1079 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1080 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1081 ext v7.16b,v20.16b,v21.16b,#8 1082 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1083 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1084 add v4.2d,v1.2d,v3.2d // "D + T1" 1085 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1086 add v25.2d,v25.2d,v17.2d 1087 ld1 {v24.2d},[x3],#16 1088 ext v25.16b,v25.16b,v25.16b,#8 1089 ext v5.16b,v4.16b,v2.16b,#8 1090 ext v6.16b,v0.16b,v4.16b,#8 1091 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1092 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1093 ext v7.16b,v21.16b,v22.16b,#8 1094 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1095 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1096 add v1.2d,v0.2d,v2.2d // "D + T1" 1097 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1098 add v24.2d,v24.2d,v18.2d 1099 ld1 {v25.2d},[x3],#16 1100 ext v24.16b,v24.16b,v24.16b,#8 1101 ext v5.16b,v1.16b,v4.16b,#8 1102 ext v6.16b,v3.16b,v1.16b,#8 1103 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1104 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1105 ext v7.16b,v22.16b,v23.16b,#8 1106 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1107 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1108 add v0.2d,v3.2d,v4.2d // "D + T1" 1109 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1110 add v25.2d,v25.2d,v19.2d 1111 ld1 {v24.2d},[x3],#16 1112 ext v25.16b,v25.16b,v25.16b,#8 1113 ext v5.16b,v0.16b,v1.16b,#8 1114 ext v6.16b,v2.16b,v0.16b,#8 1115 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1116 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1117 ext v7.16b,v23.16b,v16.16b,#8 1118 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1119 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1120 add v3.2d,v2.2d,v1.2d // "D + T1" 1121 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1122 add v24.2d,v24.2d,v20.2d 1123 ld1 {v25.2d},[x3],#16 1124 ext v24.16b,v24.16b,v24.16b,#8 1125 ext v5.16b,v3.16b,v0.16b,#8 1126 ext v6.16b,v4.16b,v3.16b,#8 1127 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1128 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1129 ext v7.16b,v16.16b,v17.16b,#8 1130 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1131 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1132 add v2.2d,v4.2d,v0.2d // "D + T1" 1133 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1134 add v25.2d,v25.2d,v21.2d 1135 ld1 {v24.2d},[x3],#16 1136 ext v25.16b,v25.16b,v25.16b,#8 1137 ext v5.16b,v2.16b,v3.16b,#8 1138 ext v6.16b,v1.16b,v2.16b,#8 1139 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1140 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1141 ext v7.16b,v17.16b,v18.16b,#8 1142 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1143 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1144 add v4.2d,v1.2d,v3.2d // "D + T1" 1145 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1146 add v24.2d,v24.2d,v22.2d 1147 ld1 {v25.2d},[x3],#16 1148 ext v24.16b,v24.16b,v24.16b,#8 1149 ext v5.16b,v4.16b,v2.16b,#8 1150 ext v6.16b,v0.16b,v4.16b,#8 1151 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1152 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1153 ext v7.16b,v18.16b,v19.16b,#8 1154 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1155 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1156 add v1.2d,v0.2d,v2.2d // "D + T1" 1157 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1158 add v25.2d,v25.2d,v23.2d 1159 ld1 {v24.2d},[x3],#16 1160 ext v25.16b,v25.16b,v25.16b,#8 1161 ext v5.16b,v1.16b,v4.16b,#8 1162 ext v6.16b,v3.16b,v1.16b,#8 1163 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1164 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1165 ext v7.16b,v19.16b,v20.16b,#8 1166 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1167 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1168 add v0.2d,v3.2d,v4.2d // "D + T1" 1169 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1170 add v24.2d,v24.2d,v16.2d 1171 ld1 {v25.2d},[x3],#16 1172 ext v24.16b,v24.16b,v24.16b,#8 1173 ext v5.16b,v0.16b,v1.16b,#8 1174 ext v6.16b,v2.16b,v0.16b,#8 1175 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1176 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1177 ext v7.16b,v20.16b,v21.16b,#8 1178 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1179 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1180 add v3.2d,v2.2d,v1.2d // "D + T1" 1181 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1182 add v25.2d,v25.2d,v17.2d 1183 ld1 {v24.2d},[x3],#16 1184 ext v25.16b,v25.16b,v25.16b,#8 1185 ext v5.16b,v3.16b,v0.16b,#8 1186 ext v6.16b,v4.16b,v3.16b,#8 1187 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1188 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1189 ext v7.16b,v21.16b,v22.16b,#8 1190 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1191 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1192 add v2.2d,v4.2d,v0.2d // "D + T1" 1193 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1194 add v24.2d,v24.2d,v18.2d 1195 ld1 {v25.2d},[x3],#16 1196 ext v24.16b,v24.16b,v24.16b,#8 1197 ext v5.16b,v2.16b,v3.16b,#8 1198 ext v6.16b,v1.16b,v2.16b,#8 1199 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1200 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1201 ext v7.16b,v22.16b,v23.16b,#8 1202 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1203 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1204 add v4.2d,v1.2d,v3.2d // "D + T1" 1205 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1206 add v25.2d,v25.2d,v19.2d 1207 ld1 {v24.2d},[x3],#16 1208 ext v25.16b,v25.16b,v25.16b,#8 1209 ext v5.16b,v4.16b,v2.16b,#8 1210 ext v6.16b,v0.16b,v4.16b,#8 1211 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1212 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1213 ext v7.16b,v23.16b,v16.16b,#8 1214 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1215 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1216 add v1.2d,v0.2d,v2.2d // "D + T1" 1217 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1218 add v24.2d,v24.2d,v20.2d 1219 ld1 {v25.2d},[x3],#16 1220 ext v24.16b,v24.16b,v24.16b,#8 1221 ext v5.16b,v1.16b,v4.16b,#8 1222 ext v6.16b,v3.16b,v1.16b,#8 1223 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1224 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1225 ext v7.16b,v16.16b,v17.16b,#8 1226 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1227 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1228 add v0.2d,v3.2d,v4.2d // "D + T1" 1229 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1230 add v25.2d,v25.2d,v21.2d 1231 ld1 {v24.2d},[x3],#16 1232 ext v25.16b,v25.16b,v25.16b,#8 1233 ext v5.16b,v0.16b,v1.16b,#8 1234 ext v6.16b,v2.16b,v0.16b,#8 1235 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1236 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1237 ext v7.16b,v17.16b,v18.16b,#8 1238 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1239 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1240 add v3.2d,v2.2d,v1.2d // "D + T1" 1241 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1242 add v24.2d,v24.2d,v22.2d 1243 ld1 {v25.2d},[x3],#16 1244 ext v24.16b,v24.16b,v24.16b,#8 1245 ext v5.16b,v3.16b,v0.16b,#8 1246 ext v6.16b,v4.16b,v3.16b,#8 1247 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1248 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1249 ext v7.16b,v18.16b,v19.16b,#8 1250 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1251 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1252 add v2.2d,v4.2d,v0.2d // "D + T1" 1253 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1254 add v25.2d,v25.2d,v23.2d 1255 ld1 {v24.2d},[x3],#16 1256 ext v25.16b,v25.16b,v25.16b,#8 1257 ext v5.16b,v2.16b,v3.16b,#8 1258 ext v6.16b,v1.16b,v2.16b,#8 1259 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1260 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1261 ext v7.16b,v19.16b,v20.16b,#8 1262 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1263 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1264 add v4.2d,v1.2d,v3.2d // "D + T1" 1265 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1266 add v24.2d,v24.2d,v16.2d 1267 ld1 {v25.2d},[x3],#16 1268 ext v24.16b,v24.16b,v24.16b,#8 1269 ext v5.16b,v4.16b,v2.16b,#8 1270 ext v6.16b,v0.16b,v4.16b,#8 1271 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1272 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1273 ext v7.16b,v20.16b,v21.16b,#8 1274 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1275 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1276 add v1.2d,v0.2d,v2.2d // "D + T1" 1277 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1278 add v25.2d,v25.2d,v17.2d 1279 ld1 {v24.2d},[x3],#16 1280 ext v25.16b,v25.16b,v25.16b,#8 1281 ext v5.16b,v1.16b,v4.16b,#8 1282 ext v6.16b,v3.16b,v1.16b,#8 1283 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1284 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1285 ext v7.16b,v21.16b,v22.16b,#8 1286 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1287 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1288 add v0.2d,v3.2d,v4.2d // "D + T1" 1289 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1290 add v24.2d,v24.2d,v18.2d 1291 ld1 {v25.2d},[x3],#16 1292 ext v24.16b,v24.16b,v24.16b,#8 1293 ext v5.16b,v0.16b,v1.16b,#8 1294 ext v6.16b,v2.16b,v0.16b,#8 1295 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1296 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1297 ext v7.16b,v22.16b,v23.16b,#8 1298 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1299 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1300 add v3.2d,v2.2d,v1.2d // "D + T1" 1301 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1302 add v25.2d,v25.2d,v19.2d 1303 ld1 {v24.2d},[x3],#16 1304 ext v25.16b,v25.16b,v25.16b,#8 1305 ext v5.16b,v3.16b,v0.16b,#8 1306 ext v6.16b,v4.16b,v3.16b,#8 1307 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1308 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1309 ext v7.16b,v23.16b,v16.16b,#8 1310 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1311 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1312 add v2.2d,v4.2d,v0.2d // "D + T1" 1313 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1314 add v24.2d,v24.2d,v20.2d 1315 ld1 {v25.2d},[x3],#16 1316 ext v24.16b,v24.16b,v24.16b,#8 1317 ext v5.16b,v2.16b,v3.16b,#8 1318 ext v6.16b,v1.16b,v2.16b,#8 1319 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1320 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1321 ext v7.16b,v16.16b,v17.16b,#8 1322 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1323 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1324 add v4.2d,v1.2d,v3.2d // "D + T1" 1325 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1326 add v25.2d,v25.2d,v21.2d 1327 ld1 {v24.2d},[x3],#16 1328 ext v25.16b,v25.16b,v25.16b,#8 1329 ext v5.16b,v4.16b,v2.16b,#8 1330 ext v6.16b,v0.16b,v4.16b,#8 1331 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1332 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1333 ext v7.16b,v17.16b,v18.16b,#8 1334 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1335 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1336 add v1.2d,v0.2d,v2.2d // "D + T1" 1337 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1338 add v24.2d,v24.2d,v22.2d 1339 ld1 {v25.2d},[x3],#16 1340 ext v24.16b,v24.16b,v24.16b,#8 1341 ext v5.16b,v1.16b,v4.16b,#8 1342 ext v6.16b,v3.16b,v1.16b,#8 1343 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1344 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1345 ext v7.16b,v18.16b,v19.16b,#8 1346 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1347 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1348 add v0.2d,v3.2d,v4.2d // "D + T1" 1349 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1350 add v25.2d,v25.2d,v23.2d 1351 ld1 {v24.2d},[x3],#16 1352 ext v25.16b,v25.16b,v25.16b,#8 1353 ext v5.16b,v0.16b,v1.16b,#8 1354 ext v6.16b,v2.16b,v0.16b,#8 1355 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1356 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1357 ext v7.16b,v19.16b,v20.16b,#8 1358 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1359 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1360 add v3.2d,v2.2d,v1.2d // "D + T1" 1361 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1362 add v24.2d,v24.2d,v16.2d 1363 ld1 {v25.2d},[x3],#16 1364 ext v24.16b,v24.16b,v24.16b,#8 1365 ext v5.16b,v3.16b,v0.16b,#8 1366 ext v6.16b,v4.16b,v3.16b,#8 1367 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1368 .inst 0xcec08230 //sha512su0 v16.16b,v17.16b 1369 ext v7.16b,v20.16b,v21.16b,#8 1370 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1371 .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b 1372 add v2.2d,v4.2d,v0.2d // "D + T1" 1373 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1374 add v25.2d,v25.2d,v17.2d 1375 ld1 {v24.2d},[x3],#16 1376 ext v25.16b,v25.16b,v25.16b,#8 1377 ext v5.16b,v2.16b,v3.16b,#8 1378 ext v6.16b,v1.16b,v2.16b,#8 1379 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1380 .inst 0xcec08251 //sha512su0 v17.16b,v18.16b 1381 ext v7.16b,v21.16b,v22.16b,#8 1382 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1383 .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b 1384 add v4.2d,v1.2d,v3.2d // "D + T1" 1385 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1386 add v24.2d,v24.2d,v18.2d 1387 ld1 {v25.2d},[x3],#16 1388 ext v24.16b,v24.16b,v24.16b,#8 1389 ext v5.16b,v4.16b,v2.16b,#8 1390 ext v6.16b,v0.16b,v4.16b,#8 1391 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1392 .inst 0xcec08272 //sha512su0 v18.16b,v19.16b 1393 ext v7.16b,v22.16b,v23.16b,#8 1394 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1395 .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b 1396 add v1.2d,v0.2d,v2.2d // "D + T1" 1397 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1398 add v25.2d,v25.2d,v19.2d 1399 ld1 {v24.2d},[x3],#16 1400 ext v25.16b,v25.16b,v25.16b,#8 1401 ext v5.16b,v1.16b,v4.16b,#8 1402 ext v6.16b,v3.16b,v1.16b,#8 1403 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1404 .inst 0xcec08293 //sha512su0 v19.16b,v20.16b 1405 ext v7.16b,v23.16b,v16.16b,#8 1406 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1407 .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b 1408 add v0.2d,v3.2d,v4.2d // "D + T1" 1409 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1410 add v24.2d,v24.2d,v20.2d 1411 ld1 {v25.2d},[x3],#16 1412 ext v24.16b,v24.16b,v24.16b,#8 1413 ext v5.16b,v0.16b,v1.16b,#8 1414 ext v6.16b,v2.16b,v0.16b,#8 1415 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1416 .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b 1417 ext v7.16b,v16.16b,v17.16b,#8 1418 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1419 .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b 1420 add v3.2d,v2.2d,v1.2d // "D + T1" 1421 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1422 add v25.2d,v25.2d,v21.2d 1423 ld1 {v24.2d},[x3],#16 1424 ext v25.16b,v25.16b,v25.16b,#8 1425 ext v5.16b,v3.16b,v0.16b,#8 1426 ext v6.16b,v4.16b,v3.16b,#8 1427 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1428 .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b 1429 ext v7.16b,v17.16b,v18.16b,#8 1430 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1431 .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b 1432 add v2.2d,v4.2d,v0.2d // "D + T1" 1433 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1434 add v24.2d,v24.2d,v22.2d 1435 ld1 {v25.2d},[x3],#16 1436 ext v24.16b,v24.16b,v24.16b,#8 1437 ext v5.16b,v2.16b,v3.16b,#8 1438 ext v6.16b,v1.16b,v2.16b,#8 1439 add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" 1440 .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b 1441 ext v7.16b,v18.16b,v19.16b,#8 1442 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1443 .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b 1444 add v4.2d,v1.2d,v3.2d // "D + T1" 1445 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1446 add v25.2d,v25.2d,v23.2d 1447 ld1 {v24.2d},[x3],#16 1448 ext v25.16b,v25.16b,v25.16b,#8 1449 ext v5.16b,v4.16b,v2.16b,#8 1450 ext v6.16b,v0.16b,v4.16b,#8 1451 add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" 1452 .inst 0xcec08217 //sha512su0 v23.16b,v16.16b 1453 ext v7.16b,v19.16b,v20.16b,#8 1454 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1455 .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b 1456 add v1.2d,v0.2d,v2.2d // "D + T1" 1457 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1458 ld1 {v25.2d},[x3],#16 1459 add v24.2d,v24.2d,v16.2d 1460 ld1 {v16.16b},[x1],#16 // load next input 1461 ext v24.16b,v24.16b,v24.16b,#8 1462 ext v5.16b,v1.16b,v4.16b,#8 1463 ext v6.16b,v3.16b,v1.16b,#8 1464 add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" 1465 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1466 rev64 v16.16b,v16.16b 1467 add v0.2d,v3.2d,v4.2d // "D + T1" 1468 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1469 ld1 {v24.2d},[x3],#16 1470 add v25.2d,v25.2d,v17.2d 1471 ld1 {v17.16b},[x1],#16 // load next input 1472 ext v25.16b,v25.16b,v25.16b,#8 1473 ext v5.16b,v0.16b,v1.16b,#8 1474 ext v6.16b,v2.16b,v0.16b,#8 1475 add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" 1476 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1477 rev64 v17.16b,v17.16b 1478 add v3.2d,v2.2d,v1.2d // "D + T1" 1479 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1480 ld1 {v25.2d},[x3],#16 1481 add v24.2d,v24.2d,v18.2d 1482 ld1 {v18.16b},[x1],#16 // load next input 1483 ext v24.16b,v24.16b,v24.16b,#8 1484 ext v5.16b,v3.16b,v0.16b,#8 1485 ext v6.16b,v4.16b,v3.16b,#8 1486 add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" 1487 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1488 rev64 v18.16b,v18.16b 1489 add v2.2d,v4.2d,v0.2d // "D + T1" 1490 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1491 ld1 {v24.2d},[x3],#16 1492 add v25.2d,v25.2d,v19.2d 1493 ld1 {v19.16b},[x1],#16 // load next input 1494 ext v25.16b,v25.16b,v25.16b,#8 1495 ext v5.16b,v2.16b,v3.16b,#8 1496 ext v6.16b,v1.16b,v2.16b,#8 1497 add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" 1498 .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b 1499 rev64 v19.16b,v19.16b 1500 add v4.2d,v1.2d,v3.2d // "D + T1" 1501 .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b 1502 ld1 {v25.2d},[x3],#16 1503 add v24.2d,v24.2d,v20.2d 1504 ld1 {v20.16b},[x1],#16 // load next input 1505 ext v24.16b,v24.16b,v24.16b,#8 1506 ext v5.16b,v4.16b,v2.16b,#8 1507 ext v6.16b,v0.16b,v4.16b,#8 1508 add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" 1509 .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b 1510 rev64 v20.16b,v20.16b 1511 add v1.2d,v0.2d,v2.2d // "D + T1" 1512 .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b 1513 ld1 {v24.2d},[x3],#16 1514 add v25.2d,v25.2d,v21.2d 1515 ld1 {v21.16b},[x1],#16 // load next input 1516 ext v25.16b,v25.16b,v25.16b,#8 1517 ext v5.16b,v1.16b,v4.16b,#8 1518 ext v6.16b,v3.16b,v1.16b,#8 1519 add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" 1520 .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b 1521 rev64 v21.16b,v21.16b 1522 add v0.2d,v3.2d,v4.2d // "D + T1" 1523 .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b 1524 ld1 {v25.2d},[x3],#16 1525 add v24.2d,v24.2d,v22.2d 1526 ld1 {v22.16b},[x1],#16 // load next input 1527 ext v24.16b,v24.16b,v24.16b,#8 1528 ext v5.16b,v0.16b,v1.16b,#8 1529 ext v6.16b,v2.16b,v0.16b,#8 1530 add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" 1531 .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b 1532 rev64 v22.16b,v22.16b 1533 add v3.2d,v2.2d,v1.2d // "D + T1" 1534 .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b 1535 sub x3,x3,#80*8 // rewind 1536 add v25.2d,v25.2d,v23.2d 1537 ld1 {v23.16b},[x1],#16 // load next input 1538 ext v25.16b,v25.16b,v25.16b,#8 1539 ext v5.16b,v3.16b,v0.16b,#8 1540 ext v6.16b,v4.16b,v3.16b,#8 1541 add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" 1542 .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b 1543 rev64 v23.16b,v23.16b 1544 add v2.2d,v4.2d,v0.2d // "D + T1" 1545 .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b 1546 add v0.2d,v0.2d,v26.2d // accumulate 1547 add v1.2d,v1.2d,v27.2d 1548 add v2.2d,v2.2d,v28.2d 1549 add v3.2d,v3.2d,v29.2d 1550 1551 cbnz x2,.Loop_hw 1552 1553 st1 {v0.2d-v3.2d},[x0] // store context 1554 1555 ldr x29,[sp],#16 1556 ret 1557.size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8 1558#endif 1559