1*5048c1b8SRobert Clausecker/*- 2*5048c1b8SRobert Clausecker * Copyright (c) 2023 The FreeBSD Foundation 3*5048c1b8SRobert Clausecker * 4*5048c1b8SRobert Clausecker * This software was developed by Robert Clausecker <fuz@FreeBSD.org> 5*5048c1b8SRobert Clausecker * under sponsorship from the FreeBSD Foundation. 6*5048c1b8SRobert Clausecker * 7*5048c1b8SRobert Clausecker * Redistribution and use in source and binary forms, with or without 8*5048c1b8SRobert Clausecker * modification, are permitted provided that the following conditions 9*5048c1b8SRobert Clausecker * are met: 10*5048c1b8SRobert Clausecker * 1. Redistributions of source code must retain the above copyright 11*5048c1b8SRobert Clausecker * notice, this list of conditions and the following disclaimer. 12*5048c1b8SRobert Clausecker * 2. Redistributions in binary form must reproduce the above copyright 13*5048c1b8SRobert Clausecker * notice, this list of conditions and the following disclaimer in the 14*5048c1b8SRobert Clausecker * documentation and/or other materials provided with the distribution. 15*5048c1b8SRobert Clausecker * 16*5048c1b8SRobert Clausecker * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND 17*5048c1b8SRobert Clausecker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18*5048c1b8SRobert Clausecker * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19*5048c1b8SRobert Clausecker * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20*5048c1b8SRobert Clausecker * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21*5048c1b8SRobert Clausecker * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22*5048c1b8SRobert Clausecker * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23*5048c1b8SRobert Clausecker * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24*5048c1b8SRobert Clausecker * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25*5048c1b8SRobert Clausecker * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26*5048c1b8SRobert Clausecker * SUCH DAMAGE 27*5048c1b8SRobert Clausecker */ 28*5048c1b8SRobert Clausecker 29*5048c1b8SRobert Clausecker#include <machine/asm.h> 30*5048c1b8SRobert Clausecker 31*5048c1b8SRobert Clausecker#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 32*5048c1b8SRobert Clausecker 33*5048c1b8SRobert Clausecker/* int timingsafe_memcmp(const void *rdi, const void *rsi, size_t rdx) */ 34*5048c1b8SRobert ClauseckerENTRY(timingsafe_memcmp) 35*5048c1b8SRobert Clausecker cmp $16, %rdx # at least 17 bytes to process? 36*5048c1b8SRobert Clausecker ja .Lgt16 37*5048c1b8SRobert Clausecker 38*5048c1b8SRobert Clausecker cmp $8, %edx # at least 9 bytes to process? 39*5048c1b8SRobert Clausecker ja .L0916 40*5048c1b8SRobert Clausecker 41*5048c1b8SRobert Clausecker cmp $4, %edx # at least 5 bytes to process? 42*5048c1b8SRobert Clausecker ja .L0508 43*5048c1b8SRobert Clausecker 44*5048c1b8SRobert Clausecker cmp $2, %edx # at least 3 bytes to process? 45*5048c1b8SRobert Clausecker ja .L0304 46*5048c1b8SRobert Clausecker 47*5048c1b8SRobert Clausecker test %edx, %edx # buffer empty? 48*5048c1b8SRobert Clausecker jnz .L0102 49*5048c1b8SRobert Clausecker 50*5048c1b8SRobert Clausecker xor %eax, %eax # empty buffer always matches 51*5048c1b8SRobert Clausecker ret 52*5048c1b8SRobert Clausecker 53*5048c1b8SRobert Clausecker.L0102: movzbl -1(%rdi, %rdx, 1), %eax # load 1--2 bytes from first buffer 54*5048c1b8SRobert Clausecker movzbl -1(%rsi, %rdx, 1), %ecx 55*5048c1b8SRobert Clausecker mov (%rdi), %ah # in big endian 56*5048c1b8SRobert Clausecker mov (%rsi), %ch 57*5048c1b8SRobert Clausecker sub %ecx, %eax 58*5048c1b8SRobert Clausecker ret 59*5048c1b8SRobert Clausecker 60*5048c1b8SRobert Clausecker.L0304: movzwl -2(%rdi, %rdx, 1), %ecx 61*5048c1b8SRobert Clausecker movzwl -2(%rsi, %rdx, 1), %edx 62*5048c1b8SRobert Clausecker movzwl (%rdi), %eax 63*5048c1b8SRobert Clausecker movzwl (%rsi), %esi 64*5048c1b8SRobert Clausecker bswap %ecx # convert to big endian 65*5048c1b8SRobert Clausecker bswap %edx # dito for edx, (e)ax, and (e)si 66*5048c1b8SRobert Clausecker rol $8, %ax # ROLW is used here so the upper two 67*5048c1b8SRobert Clausecker rol $8, %si # bytes stay clear, allowing us to 68*5048c1b8SRobert Clausecker sub %edx, %ecx # save a SBB compared to .L0508 69*5048c1b8SRobert Clausecker sbb %esi, %eax 70*5048c1b8SRobert Clausecker or %eax, %ecx # nonzero if not equal 71*5048c1b8SRobert Clausecker setnz %al 72*5048c1b8SRobert Clausecker ret 73*5048c1b8SRobert Clausecker 74*5048c1b8SRobert Clausecker.L0508: mov -4(%rdi, %rdx, 1), %ecx 75*5048c1b8SRobert Clausecker mov -4(%rsi, %rdx, 1), %edx 76*5048c1b8SRobert Clausecker mov (%rdi), %edi 77*5048c1b8SRobert Clausecker mov (%rsi), %esi 78*5048c1b8SRobert Clausecker bswap %ecx # compare in big endian 79*5048c1b8SRobert Clausecker bswap %edx 80*5048c1b8SRobert Clausecker bswap %edi 81*5048c1b8SRobert Clausecker bswap %esi 82*5048c1b8SRobert Clausecker sub %edx, %ecx 83*5048c1b8SRobert Clausecker sbb %esi, %edi 84*5048c1b8SRobert Clausecker sbb %eax, %eax # -1 if less, 0 if greater or equal 85*5048c1b8SRobert Clausecker or %edi, %ecx # nonzero if not equal 86*5048c1b8SRobert Clausecker setnz %al # negative if <, 0 if =, 1 if > 87*5048c1b8SRobert Clausecker ret 88*5048c1b8SRobert Clausecker 89*5048c1b8SRobert Clausecker.L0916: mov -8(%rdi, %rdx, 1), %rcx 90*5048c1b8SRobert Clausecker mov -8(%rsi, %rdx, 1), %rdx 91*5048c1b8SRobert Clausecker mov (%rdi), %rdi 92*5048c1b8SRobert Clausecker mov (%rsi), %rsi 93*5048c1b8SRobert Clausecker bswap %rcx # compare in big endian 94*5048c1b8SRobert Clausecker bswap %rdx 95*5048c1b8SRobert Clausecker bswap %rdi 96*5048c1b8SRobert Clausecker bswap %rsi 97*5048c1b8SRobert Clausecker sub %rdx, %rcx 98*5048c1b8SRobert Clausecker sbb %rsi, %rdi 99*5048c1b8SRobert Clausecker sbb %eax, %eax # -1 if less, 0 if greater or equal 100*5048c1b8SRobert Clausecker or %rdi, %rcx # nonzero if not equal 101*5048c1b8SRobert Clausecker setnz %al # negative if <, 0 if =, 1 if > 102*5048c1b8SRobert Clausecker ret 103*5048c1b8SRobert Clausecker 104*5048c1b8SRobert Clausecker /* compare 17+ bytes */ 105*5048c1b8SRobert Clausecker.Lgt16: mov (%rdi), %r8 # process first 16 bytes 106*5048c1b8SRobert Clausecker mov (%rsi), %r9 107*5048c1b8SRobert Clausecker mov $32, %ecx 108*5048c1b8SRobert Clausecker cmp %r8, %r9 # mismatch in head? 109*5048c1b8SRobert Clausecker cmove 8(%rdi), %r8 # if not, try second pair 110*5048c1b8SRobert Clausecker cmove 8(%rsi), %r9 111*5048c1b8SRobert Clausecker cmp %rdx, %rcx 112*5048c1b8SRobert Clausecker jae .Ltail 113*5048c1b8SRobert Clausecker 114*5048c1b8SRobert Clausecker /* main loop processing 16 bytes per iteration */ 115*5048c1b8SRobert Clausecker ALIGN_TEXT 116*5048c1b8SRobert Clausecker0: mov -16(%rdi, %rcx, 1), %r10 117*5048c1b8SRobert Clausecker mov -16(%rsi, %rcx, 1), %r11 118*5048c1b8SRobert Clausecker cmp %r10, %r11 # mismatch in first pair? 119*5048c1b8SRobert Clausecker cmove -8(%rdi, %rcx, 1), %r10 # if not, try second pair 120*5048c1b8SRobert Clausecker cmove -8(%rsi, %rcx, 1), %r11 121*5048c1b8SRobert Clausecker cmp %r8, %r9 # was there a mismatch previously? 122*5048c1b8SRobert Clausecker cmove %r10, %r8 # apply new pair if there was not 123*5048c1b8SRobert Clausecker cmove %r11, %r9 124*5048c1b8SRobert Clausecker add $16, %rcx 125*5048c1b8SRobert Clausecker cmp %rdx, %rcx 126*5048c1b8SRobert Clausecker jb 0b 127*5048c1b8SRobert Clausecker 128*5048c1b8SRobert Clausecker.Ltail: mov -8(%rdi, %rdx, 1), %r10 129*5048c1b8SRobert Clausecker mov -8(%rsi, %rdx, 1), %r11 130*5048c1b8SRobert Clausecker cmp %r8, %r9 131*5048c1b8SRobert Clausecker cmove -16(%rdi, %rdx, 1), %r8 132*5048c1b8SRobert Clausecker cmove -16(%rsi, %rdx, 1), %r9 133*5048c1b8SRobert Clausecker bswap %r10 # compare in big endian 134*5048c1b8SRobert Clausecker bswap %r11 135*5048c1b8SRobert Clausecker bswap %r8 136*5048c1b8SRobert Clausecker bswap %r9 137*5048c1b8SRobert Clausecker sub %r11, %r10 138*5048c1b8SRobert Clausecker sbb %r9, %r8 139*5048c1b8SRobert Clausecker sbb %eax, %eax # -1 if less, 0 if greater or equal 140*5048c1b8SRobert Clausecker or %r10, %r8 # nonzero if not equal 141*5048c1b8SRobert Clausecker setnz %al # negative if <, 0 if =, 1 if > 142*5048c1b8SRobert Clausecker ret 143*5048c1b8SRobert ClauseckerEND(timingsafe_memcmp) 144*5048c1b8SRobert Clausecker 145*5048c1b8SRobert Clausecker .section .note.GNU-stack,"",%progbits 146