1*3f224333SRobert Clausecker/* 2*3f224333SRobert Clausecker * SPDX-License-Identifier: BSD-2-Clause 3*3f224333SRobert Clausecker * 4*3f224333SRobert Clausecker * Copyright (c) 2024 Robert Clausecker 5*3f224333SRobert Clausecker */ 6*3f224333SRobert Clausecker 7*3f224333SRobert Clausecker#include <machine/asm.h> 8*3f224333SRobert Clausecker 9*3f224333SRobert ClauseckerENTRY(timingsafe_memcmp) 10*3f224333SRobert Clausecker cmp x2, #16 // at least 17 bytes to process? 11*3f224333SRobert Clausecker bhi .Lgt16 12*3f224333SRobert Clausecker 13*3f224333SRobert Clausecker cmp x2, #8 // at least 9 bytes to process? 14*3f224333SRobert Clausecker bhi .L0916 15*3f224333SRobert Clausecker 16*3f224333SRobert Clausecker cmp x2, #4 // at least 5 bytes to process? 17*3f224333SRobert Clausecker bhi .L0508 18*3f224333SRobert Clausecker 19*3f224333SRobert Clausecker cmp x2, #2 // at least 3 bytes to process? 20*3f224333SRobert Clausecker bhi .L0304 21*3f224333SRobert Clausecker 22*3f224333SRobert Clausecker cbnz x2, .L0102 // buffer empty? 23*3f224333SRobert Clausecker 24*3f224333SRobert Clausecker mov w0, #0 // empty buffer always matches 25*3f224333SRobert Clausecker ret 26*3f224333SRobert Clausecker 27*3f224333SRobert Clausecker.L0102: ldrb w3, [x0] // load first bytes 28*3f224333SRobert Clausecker ldrb w4, [x1] 29*3f224333SRobert Clausecker sub x2, x2, #1 30*3f224333SRobert Clausecker ldrb w5, [x0, x2] // load last bytes 31*3f224333SRobert Clausecker ldrb w6, [x1, x2] 32*3f224333SRobert Clausecker bfi w5, w3, #8, #8 // join bytes in big endian 33*3f224333SRobert Clausecker bfi w6, w4, #8, #8 34*3f224333SRobert Clausecker sub w0, w5, w6 35*3f224333SRobert Clausecker ret 36*3f224333SRobert Clausecker 37*3f224333SRobert Clausecker 38*3f224333SRobert Clausecker.L0304: ldrh w3, [x0] // load first halfwords 39*3f224333SRobert Clausecker ldrh w4, [x1] 40*3f224333SRobert Clausecker sub x2, x2, #2 41*3f224333SRobert Clausecker ldrh w5, [x0, x2] // load last halfwords 42*3f224333SRobert Clausecker ldrh w6, [x1, x2] 43*3f224333SRobert Clausecker bfi w3, w5, #16, #16 // join halfwords in little endian 44*3f224333SRobert Clausecker bfi w4, w6, #16, #16 45*3f224333SRobert Clausecker rev w3, w3 // swap word order 46*3f224333SRobert Clausecker rev w4, w4 47*3f224333SRobert Clausecker cmp w3, w4 48*3f224333SRobert Clausecker csetm w0, lo // w0 = w3 >= w4 ? 0 : -1 49*3f224333SRobert Clausecker csinc w0, w0, wzr, ls // w0 = w3 <=> w4 ? 1 : 0 : -1 50*3f224333SRobert Clausecker ret 51*3f224333SRobert Clausecker 52*3f224333SRobert Clausecker.L0508: ldr w3, [x0] // load first words 53*3f224333SRobert Clausecker ldr w4, [x1] 54*3f224333SRobert Clausecker sub x2, x2, #4 55*3f224333SRobert Clausecker ldr w5, [x0, x2] // load last words 56*3f224333SRobert Clausecker ldr w6, [x1, x2] 57*3f224333SRobert Clausecker bfi x3, x5, #32, #32 // join words in little endian 58*3f224333SRobert Clausecker bfi x4, x6, #32, #32 59*3f224333SRobert Clausecker rev x3, x3 // swap word order 60*3f224333SRobert Clausecker rev x4, x4 61*3f224333SRobert Clausecker cmp x3, x4 62*3f224333SRobert Clausecker csetm w0, lo // x0 = x3 >= w4 ? 0 : -1 63*3f224333SRobert Clausecker csinc w0, w0, wzr, ls // x0 = x3 <=> w4 ? 1 : 0 : -1 64*3f224333SRobert Clausecker ret 65*3f224333SRobert Clausecker 66*3f224333SRobert Clausecker.L0916: ldr x3, [x0] 67*3f224333SRobert Clausecker ldr x4, [x1] 68*3f224333SRobert Clausecker sub x2, x2, #8 69*3f224333SRobert Clausecker ldr x5, [x0, x2] 70*3f224333SRobert Clausecker ldr x6, [x1, x2] 71*3f224333SRobert Clausecker cmp x3, x4 // mismatch in first pair? 72*3f224333SRobert Clausecker csel x3, x3, x5, ne // use second pair if first pair equal 73*3f224333SRobert Clausecker csel x4, x4, x6, ne 74*3f224333SRobert Clausecker rev x3, x3 75*3f224333SRobert Clausecker rev x4, x4 76*3f224333SRobert Clausecker cmp x3, x4 77*3f224333SRobert Clausecker csetm w0, lo 78*3f224333SRobert Clausecker csinc w0, w0, wzr, ls 79*3f224333SRobert Clausecker ret 80*3f224333SRobert Clausecker 81*3f224333SRobert Clausecker /* more than 16 bytes: process buffer in a loop */ 82*3f224333SRobert Clausecker.Lgt16: ldp x3, x4, [x0], #16 83*3f224333SRobert Clausecker ldp x5, x6, [x1], #16 84*3f224333SRobert Clausecker cmp x3, x5 // mismatch in first pair? 85*3f224333SRobert Clausecker csel x3, x3, x4, ne // use second pair if first pair equal 86*3f224333SRobert Clausecker csel x5, x5, x6, ne 87*3f224333SRobert Clausecker subs x2, x2, #32 88*3f224333SRobert Clausecker bls .Ltail 89*3f224333SRobert Clausecker 90*3f224333SRobert Clausecker0: ldp x4, x7, [x0], #16 91*3f224333SRobert Clausecker ldp x6, x8, [x1], #16 92*3f224333SRobert Clausecker cmp x4, x6 // mismatch in first pair? 93*3f224333SRobert Clausecker csel x4, x4, x7, ne // if not, try second pair 94*3f224333SRobert Clausecker csel x6, x6, x8, ne 95*3f224333SRobert Clausecker cmp x3, x5 // was there a mismatch previously? 96*3f224333SRobert Clausecker csel x3, x3, x4, ne // apply new pair if there was not 97*3f224333SRobert Clausecker csel x5, x5, x6, ne 98*3f224333SRobert Clausecker subs x2, x2, #16 99*3f224333SRobert Clausecker bhi 0b 100*3f224333SRobert Clausecker 101*3f224333SRobert Clausecker.Ltail: add x0, x0, x2 102*3f224333SRobert Clausecker add x1, x1, x2 103*3f224333SRobert Clausecker ldp x4, x7, [x0] 104*3f224333SRobert Clausecker ldp x6, x8, [x1] 105*3f224333SRobert Clausecker cmp x4, x6 // mismatch in first pair? 106*3f224333SRobert Clausecker csel x4, x4, x7, ne // if not, try second pair 107*3f224333SRobert Clausecker csel x6, x6, x8, ne 108*3f224333SRobert Clausecker cmp x3, x5 // was there a mismatch previously? 109*3f224333SRobert Clausecker csel x3, x3, x4, ne // apply new pair if there was not 110*3f224333SRobert Clausecker csel x5, x5, x6, ne 111*3f224333SRobert Clausecker rev x3, x3 112*3f224333SRobert Clausecker rev x5, x5 113*3f224333SRobert Clausecker cmp x3, x5 114*3f224333SRobert Clausecker csetm w0, lo 115*3f224333SRobert Clausecker csinc w0, w0, wzr, ls 116*3f224333SRobert Clausecker ret 117*3f224333SRobert ClauseckerEND(timingsafe_bcmp) 118