1/*- 2 * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * sha1block_sha1 implementation based on sha1-arm.c, 7 * written and placed in public domain by Jeffrey Walton 8 * based on code from ARM, and by Johannes Schneiders, Skip 9 * Hovsmith and Barry O'Rourke for the mbedTLS project. 10 */ 11 12#include <machine/asm.h> 13 14/* 15 * Scalar SHA1 implementation. 16 * 17 * Due to the ample register file available on AArch64, the w array is 18 * kept entirely in registers. The saved a-e variables are instead kept 19 * in memory as we don't have that much memory. 20 */ 21 22 // sha1block(SHA1_CTX, buf, len) 23ENTRY(_libmd_sha1block_scalar) 24ctx .req x0 25buf .req x1 26len .req x2 27w .req sp 28a .req w3 29b .req w4 30c .req w5 31d .req w6 32e .req w7 33k .req w8 34f .req w9 35tmp .req w10 36w_0 .req w11 37w_1 .req w12 38w_2 .req w13 39w_3 .req w14 40w_4 .req w15 41w_5 .req w16 42w_6 .req w17 43// w18 is the platform register 44w_7 .req w19 45w_8 .req w20 46w_9 .req w21 47w_10 .req w22 48w_11 .req w23 49w_12 .req w24 50w_13 .req w25 51w_14 .req w26 52w_15 .req w27 53 54.macro shuffle w_i, w_i3, w_i8, w_i14 55 eor \w_i, \w_i, \w_i3 56 eor tmp, \w_i8, \w_i14 57 eor \w_i, \w_i, tmp // w[i-16] ^ w[i-14] ^ w[i-8] ^ w[i-3] 58 ror \w_i, \w_i, #31 // w[i] = ... ror #31 59.endm 60 61.macro func1 a, b, c, d, e 62 and f, \c, \b 63 bic tmp, \d, \b 64 orr f, f, tmp 65.endm 66 67.macro func2 a, b, c, d, e 68 eor f, \b, \c 69 eor f, f, \d 70.endm 71 72.macro func3 a, b, c, d, e 73 eor tmp, \b, \c 74 and f, \b, \c 75 and tmp, tmp, \d 76 orr f, f, tmp 77.endm 78 79.macro func4 a, b, c, d, e 80 func2 \a, \b, \c, \d, \e 81.endm 82 83.macro mix a, b, c, d, e, w_i 84 ror \b, \b, #2 85 ror tmp, \a, #27 86 add \e, \e, \w_i 87 add tmp, tmp, k 88 add \e, \e, f 89 add \e, \e, tmp // (a ror 27) + e + f + k + w[i] 90.endm 91 92.macro round1 a, b, c, d, e, w_i 93 func1 \a, \b, \c, \d, \e 94 rev \w_i, \w_i 95 mix \a, \b, \c, \d, \e, \w_i 96.endm 97 98.macro round func, a, b, c, d, e, w_i, w_i3, w_i8, w_i14 99 shuffle \w_i, \w_i3, \w_i8, \w_i14 100 \func \a, \b, \c, \d, \e 101 mix \a, \b, \c, \d, \e, \w_i 102.endm 103 104.macro round1x a, b, c, d, e, w_i, w_i3, w_i8, w_i14 105 round func1, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14 106.endm 107 108.macro round2 a, b, c, d, e, w_i, w_i3, w_i8, w_i14 109 round func2, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14 110.endm 111 112.macro round3 a, b, c, d, e, w_i, w_i3, w_i8, w_i14 113 round func3, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14 114.endm 115 116.macro round4 a, b, c, d, e, w_i, w_i3, w_i8, w_i14 117 round func4, \a, \b, \c, \d, \e, \w_i, \w_i3, \w_i8, \w_i14 118.endm 119 120 ands len, len, #~63 // take length in multiples of block length 121 beq 1f // bail out if input empty 122 123 sub sp, sp, #24+9*8 // allocate stack space 124 str x19, [sp, #24+0*8] 125 stp x20, x21, [sp, #24+1*8] 126 stp x22, x23, [sp, #24+3*8] 127 stp x24, x25, [sp, #24+5*8] 128 stp x26, x27, [sp, #24+7*8] 129 130 ldp a, b, [ctx, #0] // load SHA1 state from context 131 ldp c, d, [ctx, #8] 132 ldr e, [ctx, #16] 133 1340: stp a, b, [sp, #0] // save old SHA1 state 135 stp c, d, [sp, #8] 136 str e, [sp, #16] 137 138 movz k, #0x7999 // round constant 1 139 movk k, #0x5a82, lsl #16 140 141 ldp w_0, w_1, [buf, #0*4] 142 round1 a, b, c, d, e, w_0 143 round1 e, a, b, c, d, w_1 144 145 ldp w_2, w_3, [buf, #2*4] 146 round1 d, e, a, b, c, w_2 147 round1 c, d, e, a, b, w_3 148 149 ldp w_4, w_5, [buf, #4*4] 150 round1 b, c, d, e, a, w_4 151 round1 a, b, c, d, e, w_5 152 153 ldp w_6, w_7, [buf, #6*4] 154 round1 e, a, b, c, d, w_6 155 round1 d, e, a, b, c, w_7 156 157 ldp w_8, w_9, [buf, #8*4] 158 round1 c, d, e, a, b, w_8 159 round1 b, c, d, e, a, w_9 160 161 ldp w_10, w_11, [buf, #10*4] 162 round1 a, b, c, d, e, w_10 163 round1 e, a, b, c, d, w_11 164 165 ldp w_12, w_13, [buf, #12*4] 166 round1 d, e, a, b, c, w_12 167 round1 c, d, e, a, b, w_13 168 169 ldp w_14, w_15, [buf, #14*4] 170 round1 b, c, d, e, a, w_14 171 round1 a, b, c, d, e, w_15 172 173 round1x e, a, b, c, d, w_0, w_13, w_8, w_2 174 round1x d, e, a, b, c, w_1, w_14, w_9, w_3 175 round1x c, d, e, a, b, w_2, w_15, w_10, w_4 176 round1x b, c, d, e, a, w_3, w_0, w_11, w_5 177 178 movz k, #0xeba1 // round constant 2 179 movk k, #0x6ed9, lsl #16 180 181 round2 a, b, c, d, e, w_4, w_1, w_12, w_6 182 round2 e, a, b, c, d, w_5, w_2, w_13, w_7 183 round2 d, e, a, b, c, w_6, w_3, w_14, w_8 184 round2 c, d, e, a, b, w_7, w_4, w_15, w_9 185 round2 b, c, d, e, a, w_8, w_5, w_0, w_10 186 187 round2 a, b, c, d, e, w_9, w_6, w_1, w_11 188 round2 e, a, b, c, d, w_10, w_7, w_2, w_12 189 round2 d, e, a, b, c, w_11, w_8, w_3, w_13 190 round2 c, d, e, a, b, w_12, w_9, w_4, w_14 191 round2 b, c, d, e, a, w_13, w_10, w_5, w_15 192 193 round2 a, b, c, d, e, w_14, w_11, w_6, w_0 194 round2 e, a, b, c, d, w_15, w_12, w_7, w_1 195 round2 d, e, a, b, c, w_0, w_13, w_8, w_2 196 round2 c, d, e, a, b, w_1, w_14, w_9, w_3 197 round2 b, c, d, e, a, w_2, w_15, w_10, w_4 198 199 round2 a, b, c, d, e, w_3, w_0, w_11, w_5 200 round2 e, a, b, c, d, w_4, w_1, w_12, w_6 201 round2 d, e, a, b, c, w_5, w_2, w_13, w_7 202 round2 c, d, e, a, b, w_6, w_3, w_14, w_8 203 round2 b, c, d, e, a, w_7, w_4, w_15, w_9 204 205 movz k, #0xbcdc // round constant 3 206 movk k, #0x8f1b, lsl #16 207 208 round3 a, b, c, d, e, w_8, w_5, w_0, w_10 209 round3 e, a, b, c, d, w_9, w_6, w_1, w_11 210 round3 d, e, a, b, c, w_10, w_7, w_2, w_12 211 round3 c, d, e, a, b, w_11, w_8, w_3, w_13 212 round3 b, c, d, e, a, w_12, w_9, w_4, w_14 213 214 round3 a, b, c, d, e, w_13, w_10, w_5, w_15 215 round3 e, a, b, c, d, w_14, w_11, w_6, w_0 216 round3 d, e, a, b, c, w_15, w_12, w_7, w_1 217 round3 c, d, e, a, b, w_0, w_13, w_8, w_2 218 round3 b, c, d, e, a, w_1, w_14, w_9, w_3 219 220 round3 a, b, c, d, e, w_2, w_15, w_10, w_4 221 round3 e, a, b, c, d, w_3, w_0, w_11, w_5 222 round3 d, e, a, b, c, w_4, w_1, w_12, w_6 223 round3 c, d, e, a, b, w_5, w_2, w_13, w_7 224 round3 b, c, d, e, a, w_6, w_3, w_14, w_8 225 226 round3 a, b, c, d, e, w_7, w_4, w_15, w_9 227 round3 e, a, b, c, d, w_8, w_5, w_0, w_10 228 round3 d, e, a, b, c, w_9, w_6, w_1, w_11 229 round3 c, d, e, a, b, w_10, w_7, w_2, w_12 230 round3 b, c, d, e, a, w_11, w_8, w_3, w_13 231 232 movz k, #0xc1d6 // round constant 4 233 movk k, #0xca62, lsl #16 234 235 round4 a, b, c, d, e, w_12, w_9, w_4, w_14 236 round4 e, a, b, c, d, w_13, w_10, w_5, w_15 237 round4 d, e, a, b, c, w_14, w_11, w_6, w_0 238 round4 c, d, e, a, b, w_15, w_12, w_7, w_1 239 round4 b, c, d, e, a, w_0, w_13, w_8, w_2 240 241 round4 a, b, c, d, e, w_1, w_14, w_9, w_3 242 round4 e, a, b, c, d, w_2, w_15, w_10, w_4 243 round4 d, e, a, b, c, w_3, w_0, w_11, w_5 244 round4 c, d, e, a, b, w_4, w_1, w_12, w_6 245 round4 b, c, d, e, a, w_5, w_2, w_13, w_7 246 247 round4 a, b, c, d, e, w_6, w_3, w_14, w_8 248 round4 e, a, b, c, d, w_7, w_4, w_15, w_9 249 round4 d, e, a, b, c, w_8, w_5, w_0, w_10 250 round4 c, d, e, a, b, w_9, w_6, w_1, w_11 251 round4 b, c, d, e, a, w_10, w_7, w_2, w_12 252 253 round4 a, b, c, d, e, w_11, w_8, w_3, w_13 254 round4 e, a, b, c, d, w_12, w_9, w_4, w_14 255 round4 d, e, a, b, c, w_13, w_10, w_5, w_15 256 round4 c, d, e, a, b, w_14, w_11, w_6, w_0 257 round4 b, c, d, e, a, w_15, w_12, w_7, w_1 258 259 ldp w_0, w_1, [sp, #0] // reload saved SHA1 state 260 ldp w_2, w_3, [sp, #8] 261 ldr w_4, [sp, #16] 262 263 add a, a, w_0 264 add b, b, w_1 265 add c, c, w_2 266 add d, d, w_3 267 add e, e, w_4 268 269 add buf, buf, #64 270 subs len, len, #64 271 bhi 0b 272 273 stp a, b, [ctx, #0] // write updated SHA1 state 274 stp c, d, [ctx, #8] 275 str e, [ctx, #16] 276 277 ldr x19, [sp, #24+0*8] 278 ldp x20, x21, [sp, #24+1*8] 279 ldp x22, x23, [sp, #24+3*8] 280 ldp x24, x25, [sp, #24+5*8] 281 ldp x26, x27, [sp, #24+7*8] 282 add sp, sp, #24+9*8 283 2841: ret 285END(_libmd_sha1block_scalar) 286 287/* 288 * SHA1 implementation using the SHA1 instruction set extension. 289 */ 290 291 .arch_extension sha2 292 293 // sha1block(SHA1_CTX, buf, len) 294ENTRY(_libmd_sha1block_sha1) 295 /* ctx, buf, len: same as for sha1block_scalar */ 296kaddr .req x3 297abcd .req v0 298abcd_q .req q0 // alias for use with scalar instructions 299abcd_s .req s0 300e0 .req s1 301e0_v .req v1 302e1 .req s2 303abcd_saved .req v3 304e0_saved .req v4 305tmp0 .req v5 306tmp1 .req v6 307msg0 .req v16 308msg1 .req v17 309msg2 .req v18 310msg3 .req v19 311k0 .req v20 312k1 .req v21 313k2 .req v22 314k3 .req v23 315 316 ands len, len, #~63 // take length in multiples of block length 317 beq 1f // bail out if input empty 318 319 ldr abcd_q, [ctx, #0] 320 ldr e0, [ctx, #16] 321 322 adrp kaddr, k1234 323 add kaddr, kaddr, #:lo12:k1234 324 ld4r {k0.4s, k1.4s, k2.4s, k3.4s}, [kaddr] 325 3260: mov abcd_saved.16b, abcd.16b 327 mov e0_saved.16b, e0_v.16b 328 329 ld1 {msg0.4s, msg1.4s, msg2.4s, msg3.4s}, [buf], #64 330 rev32 msg0.16b, msg0.16b 331 rev32 msg1.16b, msg1.16b 332 rev32 msg2.16b, msg2.16b 333 rev32 msg3.16b, msg3.16b 334 335 add tmp0.4s, msg0.4s, k0.4s 336 add tmp1.4s, msg1.4s, k0.4s 337 338 /* rounds 0--3 */ 339 sha1h e1, abcd_s 340 sha1c abcd_q, e0, tmp0.4s 341 add tmp0.4s, msg2.4s, k0.4s 342 sha1su0 msg0.4s, msg1.4s, msg2.4s 343 344 /* rounds 4--7 */ 345 sha1h e0, abcd_s 346 sha1c abcd_q, e1, tmp1.4s 347 add tmp1.4s, msg3.4s, k0.4s 348 sha1su1 msg0.4s, msg3.4s 349 sha1su0 msg1.4s, msg2.4s, msg3.4s 350 351 /* rounds 8--11 */ 352 sha1h e1, abcd_s 353 sha1c abcd_q, e0, tmp0.4s 354 add tmp0.4s, msg0.4s, k0.4s 355 sha1su1 msg1.4s, msg0.4s 356 sha1su0 msg2.4s, msg3.4s, msg0.4s 357 358 /* rounds 12--15 */ 359 sha1h e0, abcd_s 360 sha1c abcd_q, e1, tmp1.4s 361 add tmp1.4s, msg1.4s, k1.4s 362 sha1su1 msg2.4s, msg1.4s 363 sha1su0 msg3.4s, msg0.4s, msg1.4s 364 365 /* rounds 16--19 */ 366 sha1h e1, abcd_s 367 sha1c abcd_q, e0, tmp0.4s 368 add tmp0.4s, msg2.4s, k1.4s 369 sha1su1 msg3.4s, msg2.4s 370 sha1su0 msg0.4s, msg1.4s, msg2.4s 371 372 /* rounds 20--23 */ 373 sha1h e0, abcd_s 374 sha1p abcd_q, e1, tmp1.4s 375 add tmp1.4s, msg3.4s, k1.4s 376 sha1su1 msg0.4s, msg3.4s 377 sha1su0 msg1.4s, msg2.4s, msg3.4s 378 379 /* rounds 24--27 */ 380 sha1h e1, abcd_s 381 sha1p abcd_q, e0, tmp0.4s 382 add tmp0.4s, msg0.4s, k1.4s 383 sha1su1 msg1.4s, msg0.4s 384 sha1su0 msg2.4s, msg3.4s, msg0.4s 385 386 /* rounds 28--31 */ 387 sha1h e0, abcd_s 388 sha1p abcd_q, e1, tmp1.4s 389 add tmp1.4s, msg1.4s, k1.4s 390 sha1su1 msg2.4s, msg1.4s 391 sha1su0 msg3.4s, msg0.4s, msg1.4s 392 393 /* rounds 32--35 */ 394 sha1h e1, abcd_s 395 sha1p abcd_q, e0, tmp0.4s 396 add tmp0.4s, msg2.4s, k2.4s 397 sha1su1 msg3.4s, msg2.4s 398 sha1su0 msg0.4s, msg1.4s, msg2.4s 399 400 /* rounds 36--39 */ 401 sha1h e0, abcd_s 402 sha1p abcd_q, e1, tmp1.4s 403 add tmp1.4s, msg3.4s, k2.4s 404 sha1su1 msg0.4s, msg3.4s 405 sha1su0 msg1.4s, msg2.4s, msg3.4s 406 407 /* rounds 40--43 */ 408 sha1h e1, abcd_s 409 sha1m abcd_q, e0, tmp0.4s 410 add tmp0.4s, msg0.4s, k2.4s 411 sha1su1 msg1.4s, msg0.4s 412 sha1su0 msg2.4s, msg3.4s, msg0.4s 413 414 /* rounds 44--47 */ 415 sha1h e0, abcd_s 416 sha1m abcd_q, e1, tmp1.4s 417 add tmp1.4s, msg1.4s, k2.4s 418 sha1su1 msg2.4s, msg1.4s 419 sha1su0 msg3.4s, msg0.4s, msg1.4s 420 421 /* rounds 48--51 */ 422 sha1h e1, abcd_s 423 sha1m abcd_q, e0, tmp0.4s 424 add tmp0.4s, msg2.4s, k2.4s 425 sha1su1 msg3.4s, msg2.4s 426 sha1su0 msg0.4s, msg1.4s, msg2.4s 427 428 /* rounds 52--55 */ 429 sha1h e0, abcd_s 430 sha1m abcd_q, e1, tmp1.4s 431 add tmp1.4s, msg3.4s, k3.4s 432 sha1su1 msg0.4s, msg3.4s 433 sha1su0 msg1.4s, msg2.4s, msg3.4s 434 435 /* rounds 56--59 */ 436 sha1h e1, abcd_s 437 sha1m abcd_q, e0, tmp0.4s 438 add tmp0.4s, msg0.4s, k3.4s 439 sha1su1 msg1.4s, msg0.4s 440 sha1su0 msg2.4s, msg3.4s, msg0.4s 441 442 /* rounds 60--63 */ 443 sha1h e0, abcd_s 444 sha1p abcd_q, e1, tmp1.4s 445 add tmp1.4s, msg1.4s, k3.4s 446 sha1su1 msg2.4s, msg1.4s 447 sha1su0 msg3.4s, msg0.4s, msg1.4s 448 449 /* rounds 64--67 */ 450 sha1h e1, abcd_s 451 sha1p abcd_q, e0, tmp0.4s 452 add tmp0.4s, msg2.4s, k3.4s 453 sha1su1 msg3.4s, msg2.4s 454 sha1su0 msg0.4s, msg1.4s, msg2.4s 455 456 /* rounds 68--71 */ 457 sha1h e0, abcd_s 458 sha1p abcd_q, e1, tmp1.4s 459 add tmp1.4s, msg3.4s, k3.4s 460 sha1su1 msg0.4s, msg3.4s 461 462 /* rounds 72--75 */ 463 sha1h e1, abcd_s 464 sha1p abcd_q, e0, tmp0.4s 465 466 /* rounds 76--79 */ 467 sha1h e0, abcd_s 468 sha1p abcd_q, e1, tmp1.4s 469 470 add e0_v.4s, e0_v.4s, e0_saved.4s 471 add abcd.4s, abcd.4s, abcd_saved.4s 472 473 subs len, len, #64 474 bhi 0b 475 476 str abcd_q, [ctx, #0] 477 str e0, [ctx, #16] 478 4791: ret 480END(_libmd_sha1block_sha1) 481 482 .section .rodata 483 .balign 16 484k1234: .4byte 0x5a827999 485 .4byte 0x6ed9eba1 486 .4byte 0x8f1bbcdc 487 .4byte 0xca62c1d6 488 .size k1234, .-k1234 489 490 .section .note.GNU-stack,"",%progbits 491