1/*- 2 * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <sys/elf_common.h> 8#include <machine/asm.h> 9 10# optimal instruction sequence for k = \key + \m 11.macro addkm key, m 12.if 0x100000000 - \key > 0x00ffffff 13 movz k, #\key & 0xffff 14 movk k, #\key >> 16, lsl #16 15 add k, k, \m 16.elseif 0x100000000 - \key > 0x0000ffff 17 sub k, \m, #(0x100000000 - \key) & 0xfff000 18 sub k, k, #(0x100000000 - \key) & 0xfff 19.else 20 movz k, #0x100000000 - \key 21 sub k, \m, k 22.endif 23.endm 24 25.macro round a, b, c, d, f, key, m, s 26 \f f, \b, \c, \d 27 addkm \key, \m // k[i] + m[g] 28 add \a, \a, k // k[i] + m[g] + a 29 add \a, \a, f // k[i] + m[g] + a + f 30 ror \a, \a, #32-\s 31 add \a, \a, \b 32.endm 33 34 /* f = b ? c : d */ 35.macro f0 f, b, c, d 36 eor \f, \c, \d 37 and \f, \f, \b 38 eor \f, \f, \d 39.endm 40 41 /* 42 * special cased round 1 function 43 * f1 = d ? b : c = (d & b) + (~d & c) 44 */ 45.macro round1 a, b, c, d, key, m, s 46 bic tmp, \c, \d // ~d & c 47 addkm \key, \m // k[i] + m[g] 48 add \a, \a, k // k[i] + m[g] + a 49 and f, \b, \d // d & b 50 add \a, \a, tmp // k[i] + m[g] + a + (~d & c) 51 add \a, \a, f // k[i] + m[g] + a + (~d & c) + (d & b) 52 ror \a, \a, #32-\s 53 add \a, \a, \b 54.endm 55 56 /* f = b ^ c ^ d */ 57.macro f2 f, b, c, d 58 eor \f, \c, \d 59 eor \f, \f, \b 60.endm 61 62 /* f = c ^ (b | ~d) */ 63.macro f3 f, b, c, d 64 orn \f, \b, \d 65 eor \f, \f, \c 66.endm 67 68 /* do 4 rounds */ 69.macro rounds f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3 70 round a, b, c, d, \f, \k0, \m0, \s0 71 round d, a, b, c, \f, \k1, \m1, \s1 72 round c, d, a, b, \f, \k2, \m2, \s2 73 round b, c, d, a, \f, \k3, \m3, \s3 74.endm 75 76 /* do 4 rounds with f0, f1, f2, f3 */ 77.macro rounds0 m0, m1, m2, m3, k0, k1, k2, k3 78 rounds f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3 79.endm 80 81.macro rounds1 m0, m1, m2, m3, k0, k1, k2, k3 82 round1 a, b, c, d, \k0, \m0, 5 83 round1 d, a, b, c, \k1, \m1, 9 84 round1 c, d, a, b, \k2, \m2, 14 85 round1 b, c, d, a, \k3, \m3, 20 86.endm 87 88.macro rounds2 m0, m1, m2, m3, k0, k1, k2, k3 89 rounds f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3 90.endm 91 92.macro rounds3 m0, m1, m2, m3, k0, k1, k2, k3 93 rounds f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3 94.endm 95 96 /* md5block(MD5_CTX, buf, len) */ 97ENTRY(_libmd_md5block) 98ctx .req x0 99buf .req x1 100len .req x2 101end .req x2 // aliases len 102a .req w3 103b .req w4 104c .req w5 105d .req w6 106f .req w7 107tmp .req w8 108k .req w9 109m0 .req w10 110m1 .req w11 111m2 .req w12 112m3 .req w13 113m4 .req w14 114m5 .req w15 115m6 .req w16 116m7 .req w17 117 // x18 is the platform register 118m8 .req w19 119m9 .req w20 120m10 .req w21 121m11 .req w22 122m12 .req w23 123m13 .req w24 124m14 .req w25 125m15 .req w26 126 127a_ .req m0 128b_ .req m7 129c_ .req m14 130d_ .req m5 131 132 stp x19, x20, [sp, #-0x40]! 133 stp x21, x22, [sp, #0x10] 134 stp x23, x24, [sp, #0x20] 135 stp x25, x26, [sp, #0x30] 136 137 bics len, len, #63 // length in blocks 138 add end, buf, len // end pointer 139 140 beq .Lend // was len == 0 after BICS? 141 142 ldp a, b, [ctx, #0] 143 ldp c, d, [ctx, #8] 144 145 /* first eight rounds interleaved with data loads */ 146.Lloop: ldp m0, m1, [buf, #0] 147 round a, b, c, d, f0, 0xd76aa478, m0, 7 148 ldp m2, m3, [buf, #8] 149 round d, a, b, c, f0, 0xe8c7b756, m1, 12 150 ldp m4, m5, [buf, #16] 151 round c, d, a, b, f0, 0x242070db, m2, 17 152 ldp m6, m7, [buf, #24] 153 round b, c, d, a, f0, 0xc1bdceee, m3, 22 154 155 ldp m8, m9, [buf, #32] 156 round a, b, c, d, f0, 0xf57c0faf, m4, 7 157 ldp m10, m11, [buf, #40] 158 round d, a, b, c, f0, 0x4787c62a, m5, 12 159 ldp m12, m13, [buf, #48] 160 round c, d, a, b, f0, 0xa8304613, m6, 17 161 ldp m14, m15, [buf, #56] 162 round b, c, d, a, f0, 0xfd469501, m7, 22 163 164 /* remaining rounds use the roundsX macros */ 165 rounds0 m8, m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be 166 rounds0 m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 167 168 rounds1 m1, m6, m11, m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa 169 rounds1 m5, m10, m15, m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 170 rounds1 m9, m14, m3, m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed 171 rounds1 m13, m2, m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a 172 173 rounds2 m5, m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c 174 rounds2 m1, m4, m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 175 rounds2 m13, m0, m3, m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 176 rounds2 m9, m12, m15, m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 177 178 rounds3 m0, m7, m14, m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 179 rounds3 m12, m3, m10, m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 180 rounds3 m8, m15, m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 181 rounds3 m4, m11, m2, m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 182 183 ldp a_, b_, [ctx, #0] 184 ldp c_, d_, [ctx, #8] 185 add a, a, a_ 186 add b, b, b_ 187 add c, c, c_ 188 add d, d, d_ 189 stp a, b, [ctx, #0] 190 stp c, d, [ctx, #8] 191 192 add buf, buf, #64 193 cmp buf, end 194 bne .Lloop 195 196.Lend: ldp x25, x26, [sp, #0x30] 197 ldp x23, x24, [sp, #0x20] 198 ldp x21, x22, [sp, #0x10] 199 ldp x19, x20, [sp], #0x40 200 201 ret 202END(_libmd_md5block) 203 204GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) 205 206 .section .note.GNU-stack,"",%progbits 207