1/*- 2 * Copyright (c) 2018 The FreeBSD Foundation 3 * 4 * This software was developed by Mateusz Guzik <mjg@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31#include <machine/asm.h> 32__FBSDID("$FreeBSD$"); 33 34#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */ 35 36ENTRY(memcmp) 37 xorl %eax,%eax 3810: 39 cmpq $16,%rdx 40 ja 101632f 41 42100816: 43 cmpb $8,%dl 44 jl 100408f 45 movq (%rdi),%r8 46 movq (%rsi),%r9 47 cmpq %r8,%r9 48 jne 80f 49 movq -8(%rdi,%rdx),%r8 50 movq -8(%rsi,%rdx),%r9 51 cmpq %r8,%r9 52 jne 10081608f 53 ret 54100408: 55 cmpb $4,%dl 56 jl 100204f 57 movl (%rdi),%r8d 58 movl (%rsi),%r9d 59 cmpl %r8d,%r9d 60 jne 80f 61 movl -4(%rdi,%rdx),%r8d 62 movl -4(%rsi,%rdx),%r9d 63 cmpl %r8d,%r9d 64 jne 10040804f 65 ret 66100204: 67 cmpb $2,%dl 68 jl 100001f 69 movzwl (%rdi),%r8d 70 movzwl (%rsi),%r9d 71 cmpl %r8d,%r9d 72 jne 1f 73 movzwl -2(%rdi,%rdx),%r8d 74 movzwl -2(%rsi,%rdx),%r9d 75 cmpl %r8d,%r9d 76 jne 1f 77 ret 78100001: 79 cmpb $1,%dl 80 jl 100000f 81 movzbl (%rdi),%eax 82 movzbl (%rsi),%r8d 83 subl %r8d,%eax 84100000: 85 ret 86ALIGN_TEXT 87101632: 88 cmpq $32,%rdx 89 ja 103200f 90 movq (%rdi),%r8 91 movq (%rsi),%r9 92 cmpq %r8,%r9 93 jne 80f 94 movq 8(%rdi),%r8 95 movq 8(%rsi),%r9 96 cmpq %r8,%r9 97 jne 10163208f 98 movq -16(%rdi,%rdx),%r8 99 movq -16(%rsi,%rdx),%r9 100 cmpq %r8,%r9 101 jne 10163216f 102 movq -8(%rdi,%rdx),%r8 103 movq -8(%rsi,%rdx),%r9 104 cmpq %r8,%r9 105 jne 10163224f 106 ret 107ALIGN_TEXT 108103200: 109 movq (%rdi),%r8 110 movq 8(%rdi),%r9 111 subq (%rsi),%r8 112 subq 8(%rsi),%r9 113 orq %r8,%r9 114 jnz 10320000f 115 116 movq 16(%rdi),%r8 117 movq 24(%rdi),%r9 118 subq 16(%rsi),%r8 119 subq 24(%rsi),%r9 120 orq %r8,%r9 121 jnz 10320016f 122 123 leaq 32(%rdi),%rdi 124 leaq 32(%rsi),%rsi 125 subq $32,%rdx 126 cmpq $32,%rdx 127 jae 103200b 128 cmpb $0,%dl 129 jne 10b 130 ret 131 132/* 133 * Mismatch was found. 134 * 135 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). 136 */ 137ALIGN_TEXT 13810320016: 139 leaq 16(%rdi),%rdi 140 leaq 16(%rsi),%rsi 14110320000: 142 movq (%rdi),%r8 143 movq (%rsi),%r9 144 cmpq %r8,%r9 145 jne 80f 146 leaq 8(%rdi),%rdi 147 leaq 8(%rsi),%rsi 148 jmp 80f 149ALIGN_TEXT 15010081608: 15110163224: 152 leaq -8(%rdi,%rdx),%rdi 153 leaq -8(%rsi,%rdx),%rsi 154 jmp 80f 155ALIGN_TEXT 15610163216: 157 leaq -16(%rdi,%rdx),%rdi 158 leaq -16(%rsi,%rdx),%rsi 159 jmp 80f 160ALIGN_TEXT 16110163208: 162 leaq 8(%rdi),%rdi 163 leaq 8(%rsi),%rsi 164 jmp 80f 165ALIGN_TEXT 16610040804: 167 leaq -4(%rdi,%rdx),%rdi 168 leaq -4(%rsi,%rdx),%rsi 169 jmp 1f 170 171ALIGN_TEXT 17280: 173 movl (%rdi),%r8d 174 movl (%rsi),%r9d 175 cmpl %r8d,%r9d 176 jne 1f 177 leaq 4(%rdi),%rdi 178 leaq 4(%rsi),%rsi 179 180/* 181 * We have up to 4 bytes to inspect. 182 */ 1831: 184 movzbl (%rdi),%eax 185 movzbl (%rsi),%r8d 186 cmpb %r8b,%al 187 jne 2f 188 189 movzbl 1(%rdi),%eax 190 movzbl 1(%rsi),%r8d 191 cmpb %r8b,%al 192 jne 2f 193 194 movzbl 2(%rdi),%eax 195 movzbl 2(%rsi),%r8d 196 cmpb %r8b,%al 197 jne 2f 198 199 movzbl 3(%rdi),%eax 200 movzbl 3(%rsi),%r8d 2012: 202 subl %r8d,%eax 203 ret 204END(memcmp) 205 206 .section .note.GNU-stack,"",%progbits 207