1*f2c98669SRobert Clausecker/* 2*f2c98669SRobert Clausecker * SPDX-License-Identifier: BSD-2-Clause 3*f2c98669SRobert Clausecker * 4*f2c98669SRobert Clausecker * Copyright (c) 2024 Robert Clausecker 5*f2c98669SRobert Clausecker */ 6*f2c98669SRobert Clausecker 7*f2c98669SRobert Clausecker#include <machine/asm.h> 8*f2c98669SRobert Clausecker 9*f2c98669SRobert ClauseckerENTRY(timingsafe_bcmp) 10*f2c98669SRobert Clausecker cmp x2, #32 // at least 33 bytes to process? 11*f2c98669SRobert Clausecker bhi .Lgt32 12*f2c98669SRobert Clausecker 13*f2c98669SRobert Clausecker cmp x2, #16 // at least 17 bytes to process? 14*f2c98669SRobert Clausecker bhi .L1732 15*f2c98669SRobert Clausecker 16*f2c98669SRobert Clausecker cmp x2, #8 // at least 9 bytes to process? 17*f2c98669SRobert Clausecker bhi .L0916 18*f2c98669SRobert Clausecker 19*f2c98669SRobert Clausecker cmp x2, #4 // at least 5 bytes to process? 20*f2c98669SRobert Clausecker bhi .L0508 21*f2c98669SRobert Clausecker 22*f2c98669SRobert Clausecker cmp x2, #2 // at least 3 bytes to process? 23*f2c98669SRobert Clausecker bhi .L0304 24*f2c98669SRobert Clausecker 25*f2c98669SRobert Clausecker cbnz x2, .L0102 // buffer empty? 26*f2c98669SRobert Clausecker 27*f2c98669SRobert Clausecker mov w0, #0 // empty buffer always matches 28*f2c98669SRobert Clausecker ret 29*f2c98669SRobert Clausecker 30*f2c98669SRobert Clausecker.L0102: ldrb w3, [x0] // load first bytes 31*f2c98669SRobert Clausecker ldrb w4, [x1] 32*f2c98669SRobert Clausecker sub x2, x2, #1 33*f2c98669SRobert Clausecker ldrb w5, [x0, x2] // load last bytes 34*f2c98669SRobert Clausecker ldrb w6, [x1, x2] 35*f2c98669SRobert Clausecker eor w3, w3, w4 36*f2c98669SRobert Clausecker eor w5, w5, w6 37*f2c98669SRobert Clausecker orr w0, w3, w5 38*f2c98669SRobert Clausecker ret 39*f2c98669SRobert Clausecker 40*f2c98669SRobert Clausecker.L0304: ldrh w3, [x0] // load first halfwords 41*f2c98669SRobert Clausecker ldrh w4, [x1] 42*f2c98669SRobert Clausecker sub x2, x2, #2 43*f2c98669SRobert Clausecker ldrh w5, [x0, x2] // load last halfwords 44*f2c98669SRobert Clausecker ldrh w6, [x1, x2] 45*f2c98669SRobert Clausecker eor w3, w3, w4 46*f2c98669SRobert Clausecker eor w5, w5, w6 47*f2c98669SRobert Clausecker orr w0, w3, w5 48*f2c98669SRobert Clausecker ret 49*f2c98669SRobert Clausecker 50*f2c98669SRobert Clausecker.L0508: ldr w3, [x0] // load first words 51*f2c98669SRobert Clausecker ldr w4, [x1] 52*f2c98669SRobert Clausecker sub x2, x2, #4 53*f2c98669SRobert Clausecker ldr w5, [x0, x2] // load last words 54*f2c98669SRobert Clausecker ldr w6, [x1, x2] 55*f2c98669SRobert Clausecker eor w3, w3, w4 56*f2c98669SRobert Clausecker eor w5, w5, w6 57*f2c98669SRobert Clausecker orr w0, w3, w5 58*f2c98669SRobert Clausecker ret 59*f2c98669SRobert Clausecker 60*f2c98669SRobert Clausecker.L0916: ldr x3, [x0] 61*f2c98669SRobert Clausecker ldr x4, [x1] 62*f2c98669SRobert Clausecker sub x2, x2, #8 63*f2c98669SRobert Clausecker ldr x5, [x0, x2] 64*f2c98669SRobert Clausecker ldr x6, [x1, x2] 65*f2c98669SRobert Clausecker eor x3, x3, x4 66*f2c98669SRobert Clausecker eor x5, x5, x6 67*f2c98669SRobert Clausecker orr x0, x3, x5 68*f2c98669SRobert Clausecker orr x0, x0, x0, lsr #32 // ensure low 32 bits are nonzero iff mismatch 69*f2c98669SRobert Clausecker ret 70*f2c98669SRobert Clausecker 71*f2c98669SRobert Clausecker.L1732: ldr q0, [x0] 72*f2c98669SRobert Clausecker ldr q1, [x1] 73*f2c98669SRobert Clausecker sub x2, x2, #16 74*f2c98669SRobert Clausecker ldr q2, [x0, x2] 75*f2c98669SRobert Clausecker ldr q3, [x1, x2] 76*f2c98669SRobert Clausecker eor v0.16b, v0.16b, v1.16b 77*f2c98669SRobert Clausecker eor v2.16b, v2.16b, v3.16b 78*f2c98669SRobert Clausecker orr v0.16b, v0.16b, v2.16b 79*f2c98669SRobert Clausecker umaxv s0, v0.4s // get a nonzero word if any 80*f2c98669SRobert Clausecker mov w0, v0.s[0] 81*f2c98669SRobert Clausecker ret 82*f2c98669SRobert Clausecker 83*f2c98669SRobert Clausecker /* more than 32 bytes: process buffer in a loop */ 84*f2c98669SRobert Clausecker.Lgt32: ldp q0, q1, [x0], #32 85*f2c98669SRobert Clausecker ldp q2, q3, [x1], #32 86*f2c98669SRobert Clausecker eor v0.16b, v0.16b, v2.16b 87*f2c98669SRobert Clausecker eor v1.16b, v1.16b, v3.16b 88*f2c98669SRobert Clausecker orr v4.16b, v0.16b, v1.16b 89*f2c98669SRobert Clausecker subs x2, x2, #64 // enough left for another iteration? 90*f2c98669SRobert Clausecker bls .Ltail 91*f2c98669SRobert Clausecker 92*f2c98669SRobert Clausecker0: ldp q0, q1, [x0], #32 93*f2c98669SRobert Clausecker ldp q2, q3, [x1], #32 94*f2c98669SRobert Clausecker eor v0.16b, v0.16b, v2.16b 95*f2c98669SRobert Clausecker eor v1.16b, v1.16b, v3.16b 96*f2c98669SRobert Clausecker orr v0.16b, v0.16b, v1.16b 97*f2c98669SRobert Clausecker orr v4.16b, v4.16b, v0.16b 98*f2c98669SRobert Clausecker subs x2, x2, #32 99*f2c98669SRobert Clausecker bhi 0b 100*f2c98669SRobert Clausecker 101*f2c98669SRobert Clausecker /* process last 32 bytes */ 102*f2c98669SRobert Clausecker.Ltail: add x0, x0, x2 // point to the last 32 bytes in the buffer 103*f2c98669SRobert Clausecker add x1, x1, x2 104*f2c98669SRobert Clausecker ldp q0, q1, [x0] 105*f2c98669SRobert Clausecker ldp q2, q3, [x1] 106*f2c98669SRobert Clausecker eor v0.16b, v0.16b, v2.16b 107*f2c98669SRobert Clausecker eor v1.16b, v1.16b, v3.16b 108*f2c98669SRobert Clausecker orr v0.16b, v0.16b, v1.16b 109*f2c98669SRobert Clausecker orr v4.16b, v4.16b, v0.16b 110*f2c98669SRobert Clausecker umaxv s0, v4.4s // get a nonzero word if any 111*f2c98669SRobert Clausecker mov w0, v0.s[0] 112*f2c98669SRobert Clausecker ret 113*f2c98669SRobert ClauseckerEND(timingsafe_bcmp) 114