1/* 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 Robert Clausecker 5 */ 6 7#include <machine/asm.h> 8 9ENTRY(timingsafe_bcmp) 10 cmp x2, #32 // at least 33 bytes to process? 11 bhi .Lgt32 12 13 cmp x2, #16 // at least 17 bytes to process? 14 bhi .L1732 15 16 cmp x2, #8 // at least 9 bytes to process? 17 bhi .L0916 18 19 cmp x2, #4 // at least 5 bytes to process? 20 bhi .L0508 21 22 cmp x2, #2 // at least 3 bytes to process? 23 bhi .L0304 24 25 cbnz x2, .L0102 // buffer empty? 26 27 mov w0, #0 // empty buffer always matches 28 ret 29 30.L0102: ldrb w3, [x0] // load first bytes 31 ldrb w4, [x1] 32 sub x2, x2, #1 33 ldrb w5, [x0, x2] // load last bytes 34 ldrb w6, [x1, x2] 35 eor w3, w3, w4 36 eor w5, w5, w6 37 orr w0, w3, w5 38 ret 39 40.L0304: ldrh w3, [x0] // load first halfwords 41 ldrh w4, [x1] 42 sub x2, x2, #2 43 ldrh w5, [x0, x2] // load last halfwords 44 ldrh w6, [x1, x2] 45 eor w3, w3, w4 46 eor w5, w5, w6 47 orr w0, w3, w5 48 ret 49 50.L0508: ldr w3, [x0] // load first words 51 ldr w4, [x1] 52 sub x2, x2, #4 53 ldr w5, [x0, x2] // load last words 54 ldr w6, [x1, x2] 55 eor w3, w3, w4 56 eor w5, w5, w6 57 orr w0, w3, w5 58 ret 59 60.L0916: ldr x3, [x0] 61 ldr x4, [x1] 62 sub x2, x2, #8 63 ldr x5, [x0, x2] 64 ldr x6, [x1, x2] 65 eor x3, x3, x4 66 eor x5, x5, x6 67 orr x0, x3, x5 68 orr x0, x0, x0, lsr #32 // ensure low 32 bits are nonzero iff mismatch 69 ret 70 71.L1732: ldr q0, [x0] 72 ldr q1, [x1] 73 sub x2, x2, #16 74 ldr q2, [x0, x2] 75 ldr q3, [x1, x2] 76 eor v0.16b, v0.16b, v1.16b 77 eor v2.16b, v2.16b, v3.16b 78 orr v0.16b, v0.16b, v2.16b 79 umaxv s0, v0.4s // get a nonzero word if any 80 mov w0, v0.s[0] 81 ret 82 83 /* more than 32 bytes: process buffer in a loop */ 84.Lgt32: ldp q0, q1, [x0], #32 85 ldp q2, q3, [x1], #32 86 eor v0.16b, v0.16b, v2.16b 87 eor v1.16b, v1.16b, v3.16b 88 orr v4.16b, v0.16b, v1.16b 89 subs x2, x2, #64 // enough left for another iteration? 90 bls .Ltail 91 920: ldp q0, q1, [x0], #32 93 ldp q2, q3, [x1], #32 94 eor v0.16b, v0.16b, v2.16b 95 eor v1.16b, v1.16b, v3.16b 96 orr v0.16b, v0.16b, v1.16b 97 orr v4.16b, v4.16b, v0.16b 98 subs x2, x2, #32 99 bhi 0b 100 101 /* process last 32 bytes */ 102.Ltail: add x0, x0, x2 // point to the last 32 bytes in the buffer 103 add x1, x1, x2 104 ldp q0, q1, [x0] 105 ldp q2, q3, [x1] 106 eor v0.16b, v0.16b, v2.16b 107 eor v1.16b, v1.16b, v3.16b 108 orr v0.16b, v0.16b, v1.16b 109 orr v4.16b, v4.16b, v0.16b 110 umaxv s0, v4.4s // get a nonzero word if any 111 mov w0, v0.s[0] 112 ret 113END(timingsafe_bcmp) 114