1*c1135b2bSRobert Clausecker/*- 2*c1135b2bSRobert Clausecker * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org> 3*c1135b2bSRobert Clausecker * 4*c1135b2bSRobert Clausecker * SPDX-License-Identifier: BSD-2-Clause 5*c1135b2bSRobert Clausecker */ 6*c1135b2bSRobert Clausecker 7*c1135b2bSRobert Clausecker#include <sys/elf_common.h> 8*c1135b2bSRobert Clausecker#include <machine/asm.h> 9*c1135b2bSRobert Clausecker 10*c1135b2bSRobert Clausecker# optimal instruction sequence for k = \key + \m 11*c1135b2bSRobert Clausecker.macro addkm key, m 12*c1135b2bSRobert Clausecker.if 0x100000000 - \key > 0x00ffffff 13*c1135b2bSRobert Clausecker movz k, #\key & 0xffff 14*c1135b2bSRobert Clausecker movk k, #\key >> 16, lsl #16 15*c1135b2bSRobert Clausecker add k, k, \m 16*c1135b2bSRobert Clausecker.elseif 0x100000000 - \key > 0x0000ffff 17*c1135b2bSRobert Clausecker sub k, \m, #(0x100000000 - \key) & 0xfff000 18*c1135b2bSRobert Clausecker sub k, k, #(0x100000000 - \key) & 0xfff 19*c1135b2bSRobert Clausecker.else 20*c1135b2bSRobert Clausecker movz k, #0x100000000 - \key 21*c1135b2bSRobert Clausecker sub k, \m, k 22*c1135b2bSRobert Clausecker.endif 23*c1135b2bSRobert Clausecker.endm 24*c1135b2bSRobert Clausecker 25*c1135b2bSRobert Clausecker.macro round a, b, c, d, f, key, m, s 26*c1135b2bSRobert Clausecker \f f, \b, \c, \d 27*c1135b2bSRobert Clausecker addkm \key, \m // k[i] + m[g] 28*c1135b2bSRobert Clausecker add \a, \a, k // k[i] + m[g] + a 29*c1135b2bSRobert Clausecker add \a, \a, f // k[i] + m[g] + a + f 30*c1135b2bSRobert Clausecker ror \a, \a, #32-\s 31*c1135b2bSRobert Clausecker add \a, \a, \b 32*c1135b2bSRobert Clausecker.endm 33*c1135b2bSRobert Clausecker 34*c1135b2bSRobert Clausecker /* f = b ? c : d */ 35*c1135b2bSRobert Clausecker.macro f0 f, b, c, d 36*c1135b2bSRobert Clausecker eor \f, \c, \d 37*c1135b2bSRobert Clausecker and \f, \f, \b 38*c1135b2bSRobert Clausecker eor \f, \f, \d 39*c1135b2bSRobert Clausecker.endm 40*c1135b2bSRobert Clausecker 41*c1135b2bSRobert Clausecker /* 42*c1135b2bSRobert Clausecker * special cased round 1 function 43*c1135b2bSRobert Clausecker * f1 = d ? b : c = (d & b) + (~d & c) 44*c1135b2bSRobert Clausecker */ 45*c1135b2bSRobert Clausecker.macro round1 a, b, c, d, key, m, s 46*c1135b2bSRobert Clausecker bic tmp, \c, \d // ~d & c 47*c1135b2bSRobert Clausecker addkm \key, \m // k[i] + m[g] 48*c1135b2bSRobert Clausecker add \a, \a, k // k[i] + m[g] + a 49*c1135b2bSRobert Clausecker and f, \b, \d // d & b 50*c1135b2bSRobert Clausecker add \a, \a, tmp // k[i] + m[g] + a + (~d & c) 51*c1135b2bSRobert Clausecker add \a, \a, f // k[i] + m[g] + a + (~d & c) + (d & b) 52*c1135b2bSRobert Clausecker ror \a, \a, #32-\s 53*c1135b2bSRobert Clausecker add \a, \a, \b 54*c1135b2bSRobert Clausecker.endm 55*c1135b2bSRobert Clausecker 56*c1135b2bSRobert Clausecker /* f = b ^ c ^ d */ 57*c1135b2bSRobert Clausecker.macro f2 f, b, c, d 58*c1135b2bSRobert Clausecker eor \f, \c, \d 59*c1135b2bSRobert Clausecker eor \f, \f, \b 60*c1135b2bSRobert Clausecker.endm 61*c1135b2bSRobert Clausecker 62*c1135b2bSRobert Clausecker /* f = c ^ (b | ~d) */ 63*c1135b2bSRobert Clausecker.macro f3 f, b, c, d 64*c1135b2bSRobert Clausecker orn \f, \b, \d 65*c1135b2bSRobert Clausecker eor \f, \f, \c 66*c1135b2bSRobert Clausecker.endm 67*c1135b2bSRobert Clausecker 68*c1135b2bSRobert Clausecker /* do 4 rounds */ 69*c1135b2bSRobert Clausecker.macro rounds f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3 70*c1135b2bSRobert Clausecker round a, b, c, d, \f, \k0, \m0, \s0 71*c1135b2bSRobert Clausecker round d, a, b, c, \f, \k1, \m1, \s1 72*c1135b2bSRobert Clausecker round c, d, a, b, \f, \k2, \m2, \s2 73*c1135b2bSRobert Clausecker round b, c, d, a, \f, \k3, \m3, \s3 74*c1135b2bSRobert Clausecker.endm 75*c1135b2bSRobert Clausecker 76*c1135b2bSRobert Clausecker /* do 4 rounds with f0, f1, f2, f3 */ 77*c1135b2bSRobert Clausecker.macro rounds0 m0, m1, m2, m3, k0, k1, k2, k3 78*c1135b2bSRobert Clausecker rounds f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3 79*c1135b2bSRobert Clausecker.endm 80*c1135b2bSRobert Clausecker 81*c1135b2bSRobert Clausecker.macro rounds1 m0, m1, m2, m3, k0, k1, k2, k3 82*c1135b2bSRobert Clausecker round1 a, b, c, d, \k0, \m0, 5 83*c1135b2bSRobert Clausecker round1 d, a, b, c, \k1, \m1, 9 84*c1135b2bSRobert Clausecker round1 c, d, a, b, \k2, \m2, 14 85*c1135b2bSRobert Clausecker round1 b, c, d, a, \k3, \m3, 20 86*c1135b2bSRobert Clausecker.endm 87*c1135b2bSRobert Clausecker 88*c1135b2bSRobert Clausecker.macro rounds2 m0, m1, m2, m3, k0, k1, k2, k3 89*c1135b2bSRobert Clausecker rounds f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3 90*c1135b2bSRobert Clausecker.endm 91*c1135b2bSRobert Clausecker 92*c1135b2bSRobert Clausecker.macro rounds3 m0, m1, m2, m3, k0, k1, k2, k3 93*c1135b2bSRobert Clausecker rounds f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3 94*c1135b2bSRobert Clausecker.endm 95*c1135b2bSRobert Clausecker 96*c1135b2bSRobert Clausecker /* md5block(MD5_CTX, buf, len) */ 97*c1135b2bSRobert ClauseckerENTRY(_libmd_md5block) 98*c1135b2bSRobert Clauseckerctx .req x0 99*c1135b2bSRobert Clauseckerbuf .req x1 100*c1135b2bSRobert Clauseckerlen .req x2 101*c1135b2bSRobert Clauseckerend .req x2 // aliases len 102*c1135b2bSRobert Clauseckera .req w3 103*c1135b2bSRobert Clauseckerb .req w4 104*c1135b2bSRobert Clauseckerc .req w5 105*c1135b2bSRobert Clauseckerd .req w6 106*c1135b2bSRobert Clauseckerf .req w7 107*c1135b2bSRobert Clauseckertmp .req w8 108*c1135b2bSRobert Clauseckerk .req w9 109*c1135b2bSRobert Clauseckerm0 .req w10 110*c1135b2bSRobert Clauseckerm1 .req w11 111*c1135b2bSRobert Clauseckerm2 .req w12 112*c1135b2bSRobert Clauseckerm3 .req w13 113*c1135b2bSRobert Clauseckerm4 .req w14 114*c1135b2bSRobert Clauseckerm5 .req w15 115*c1135b2bSRobert Clauseckerm6 .req w16 116*c1135b2bSRobert Clauseckerm7 .req w17 117*c1135b2bSRobert Clausecker // x18 is the platform register 118*c1135b2bSRobert Clauseckerm8 .req w19 119*c1135b2bSRobert Clauseckerm9 .req w20 120*c1135b2bSRobert Clauseckerm10 .req w21 121*c1135b2bSRobert Clauseckerm11 .req w22 122*c1135b2bSRobert Clauseckerm12 .req w23 123*c1135b2bSRobert Clauseckerm13 .req w24 124*c1135b2bSRobert Clauseckerm14 .req w25 125*c1135b2bSRobert Clauseckerm15 .req w26 126*c1135b2bSRobert Clausecker 127*c1135b2bSRobert Clauseckera_ .req m0 128*c1135b2bSRobert Clauseckerb_ .req m7 129*c1135b2bSRobert Clauseckerc_ .req m14 130*c1135b2bSRobert Clauseckerd_ .req m5 131*c1135b2bSRobert Clausecker 132*c1135b2bSRobert Clausecker stp x19, x20, [sp, #-0x40]! 133*c1135b2bSRobert Clausecker stp x21, x22, [sp, #0x10] 134*c1135b2bSRobert Clausecker stp x23, x24, [sp, #0x20] 135*c1135b2bSRobert Clausecker stp x25, x26, [sp, #0x30] 136*c1135b2bSRobert Clausecker 137*c1135b2bSRobert Clausecker bics len, len, #63 // length in blocks 138*c1135b2bSRobert Clausecker add end, buf, len // end pointer 139*c1135b2bSRobert Clausecker 140*c1135b2bSRobert Clausecker beq .Lend // was len == 0 after BICS? 141*c1135b2bSRobert Clausecker 142*c1135b2bSRobert Clausecker ldp a, b, [ctx, #0] 143*c1135b2bSRobert Clausecker ldp c, d, [ctx, #8] 144*c1135b2bSRobert Clausecker 145*c1135b2bSRobert Clausecker /* first eight rounds interleaved with data loads */ 146*c1135b2bSRobert Clausecker.Lloop: ldp m0, m1, [buf, #0] 147*c1135b2bSRobert Clausecker round a, b, c, d, f0, 0xd76aa478, m0, 7 148*c1135b2bSRobert Clausecker ldp m2, m3, [buf, #8] 149*c1135b2bSRobert Clausecker round d, a, b, c, f0, 0xe8c7b756, m1, 12 150*c1135b2bSRobert Clausecker ldp m4, m5, [buf, #16] 151*c1135b2bSRobert Clausecker round c, d, a, b, f0, 0x242070db, m2, 17 152*c1135b2bSRobert Clausecker ldp m6, m7, [buf, #24] 153*c1135b2bSRobert Clausecker round b, c, d, a, f0, 0xc1bdceee, m3, 22 154*c1135b2bSRobert Clausecker 155*c1135b2bSRobert Clausecker ldp m8, m9, [buf, #32] 156*c1135b2bSRobert Clausecker round a, b, c, d, f0, 0xf57c0faf, m4, 7 157*c1135b2bSRobert Clausecker ldp m10, m11, [buf, #40] 158*c1135b2bSRobert Clausecker round d, a, b, c, f0, 0x4787c62a, m5, 12 159*c1135b2bSRobert Clausecker ldp m12, m13, [buf, #48] 160*c1135b2bSRobert Clausecker round c, d, a, b, f0, 0xa8304613, m6, 17 161*c1135b2bSRobert Clausecker ldp m14, m15, [buf, #56] 162*c1135b2bSRobert Clausecker round b, c, d, a, f0, 0xfd469501, m7, 22 163*c1135b2bSRobert Clausecker 164*c1135b2bSRobert Clausecker /* remaining rounds use the roundsX macros */ 165*c1135b2bSRobert Clausecker rounds0 m8, m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be 166*c1135b2bSRobert Clausecker rounds0 m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 167*c1135b2bSRobert Clausecker 168*c1135b2bSRobert Clausecker rounds1 m1, m6, m11, m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa 169*c1135b2bSRobert Clausecker rounds1 m5, m10, m15, m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8 170*c1135b2bSRobert Clausecker rounds1 m9, m14, m3, m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed 171*c1135b2bSRobert Clausecker rounds1 m13, m2, m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a 172*c1135b2bSRobert Clausecker 173*c1135b2bSRobert Clausecker rounds2 m5, m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c 174*c1135b2bSRobert Clausecker rounds2 m1, m4, m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70 175*c1135b2bSRobert Clausecker rounds2 m13, m0, m3, m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05 176*c1135b2bSRobert Clausecker rounds2 m9, m12, m15, m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 177*c1135b2bSRobert Clausecker 178*c1135b2bSRobert Clausecker rounds3 m0, m7, m14, m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039 179*c1135b2bSRobert Clausecker rounds3 m12, m3, m10, m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1 180*c1135b2bSRobert Clausecker rounds3 m8, m15, m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1 181*c1135b2bSRobert Clausecker rounds3 m4, m11, m2, m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 182*c1135b2bSRobert Clausecker 183*c1135b2bSRobert Clausecker ldp a_, b_, [ctx, #0] 184*c1135b2bSRobert Clausecker ldp c_, d_, [ctx, #8] 185*c1135b2bSRobert Clausecker add a, a, a_ 186*c1135b2bSRobert Clausecker add b, b, b_ 187*c1135b2bSRobert Clausecker add c, c, c_ 188*c1135b2bSRobert Clausecker add d, d, d_ 189*c1135b2bSRobert Clausecker stp a, b, [ctx, #0] 190*c1135b2bSRobert Clausecker stp c, d, [ctx, #8] 191*c1135b2bSRobert Clausecker 192*c1135b2bSRobert Clausecker add buf, buf, #64 193*c1135b2bSRobert Clausecker cmp buf, end 194*c1135b2bSRobert Clausecker bne .Lloop 195*c1135b2bSRobert Clausecker 196*c1135b2bSRobert Clausecker.Lend: ldp x25, x26, [sp, #0x30] 197*c1135b2bSRobert Clausecker ldp x23, x24, [sp, #0x20] 198*c1135b2bSRobert Clausecker ldp x21, x22, [sp, #0x10] 199*c1135b2bSRobert Clausecker ldp x19, x20, [sp], #0x40 200*c1135b2bSRobert Clausecker 201*c1135b2bSRobert Clausecker ret 202*c1135b2bSRobert ClauseckerEND(_libmd_md5block) 203*c1135b2bSRobert Clausecker 204*c1135b2bSRobert ClauseckerGNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) 205*c1135b2bSRobert Clausecker 206*c1135b2bSRobert Clausecker .section .note.GNU-stack,"",%progbits 207