1/*- 2 * Copyright (c) 2023 The FreeBSD Foundation 3 * 4 * This software was developed by Robert Clausecker <fuz@FreeBSD.org> 5 * under sponsorship from the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE 27 */ 28 29#include <machine/asm.h> 30 31#include "amd64_archlevel.h" 32 33#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled 34 35 .weak rindex 36 .set rindex, strrchr 37 38ARCHFUNCS(strrchr) 39 ARCHFUNC(strrchr, scalar) 40 ARCHFUNC(strrchr, baseline) 41ENDARCHFUNCS(strrchr) 42 43ARCHENTRY(strrchr, scalar) 44 mov %edi, %ecx 45 and $~7, %rdi # align to 8 byte 46 movzbl %sil, %esi # clear stray high bits 47 movabs $0x0101010101010101, %r8 48 mov (%rdi), %rax # load first word 49 imul %r8, %rsi # replicate char 8 times 50 51 /* 52 * Unaligned input: align to 8 bytes. Then proceed the same 53 * way as with aligned input, but prevent matches before the 54 * beginning of the string. This is achieved by oring 0x01 55 * into each byte of the buffer before the string 56 */ 57 shl $3, %ecx 58 mov %r8, %r10 59 shl %cl, %r10 # 0x01 where the string is 60 xor %r8, %r10 # 0x01 where it is not 61 neg %r8 # negate 01..01 so we can use lea 62 movabs $0x8080808080808080, %r9 63 64 mov %rsi, %rcx 65 xor %rax, %rcx # str ^ c 66 or %r10, %rax # ensure str != 0 before string 67 or %r10, %rcx # ensure str^c != 0 before string 68 bswap %rcx # in reverse order, to find last match 69 mov %rdi, %r10 # location of initial mismatch (if any) 70 xor %r11, %r11 # initial mismatch (none) 71 add $8, %rdi # advance to next iteration 72 lea (%rax, %r8, 1), %rdx # str - 0x01..01 73 not %rax # ~str 74 and %rdx, %rax # (str - 0x01..01) & ~str 75 and %r9, %rax # not including junk bits 76 jnz 1f # end of string? 77 78 lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 79 not %rcx # ~(str ^ c) 80 and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 81 and %r9, %rcx # not including junk bits 82 mov %rcx, %r11 # remember mismatch in head 83 jmp 0f 84 85 /* main loop unrolled twice */ 86 ALIGN_TEXT 873: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 88 not %rcx # ~(str ^ c) 89 and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 90 and %r9, %rcx # not including junk bits 91 lea -8(%rdi), %rdx 92 cmovnz %rdx, %r10 # remember location of current mismatch 93 cmovnz %rcx, %r11 94 950: mov (%rdi), %rax # str 96 mov %rsi, %rcx 97 xor %rax, %rcx # str ^ c 98 bswap %rcx # in reverse order, to find last match 99 lea (%rax, %r8, 1), %rdx # str - 0x01..01 100 not %rax # ~str 101 and %rdx, %rax # (str - 0x01..01) & ~str 102 and %r9, %rax # not including junk bits 103 jnz 2f # end of string? 104 105 lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 106 not %rcx # ~(str ^ c) 107 and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 108 and %r9, %rcx # not including junk bits 109 cmovnz %rdi, %r10 # remember location of current mismatch 110 cmovnz %rcx, %r11 111 112 mov 8(%rdi), %rax # str 113 add $16, %rdi 114 mov %rsi, %rcx 115 xor %rax, %rcx # str ^ c 116 bswap %rcx 117 lea (%rax, %r8, 1), %rdx # str - 0x01..01 118 not %rax # ~str 119 and %rdx, %rax # (str - 0x01..01) & ~str 120 and %r9, %rax # not including junk bits 121 jz 3b # end of string? 122 123 /* NUL found */ 1241: sub $8, %rdi # undo advance past buffer 1252: lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 126 not %rcx # ~(str ^ c) 127 and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 128 and %r9, %rcx # not including junk bits 129 lea -1(%rax), %rdx 130 xor %rdx, %rax # mask of bytes in the string 131 bswap %rdx # in reverse order 132 and %rdx, %rcx # c found in the tail? 133 cmovnz %rdi, %r10 134 cmovnz %rcx, %r11 135 bswap %r11 # unreverse byte order 136 bsr %r11, %rcx # last location of c in (R10) 137 shr $3, %rcx # as byte offset 138 lea (%r10, %rcx, 1), %rax # pointer to match 139 test %r11, %r11 # was there actually a match? 140 cmovz %r11, %rax # if not, return null pointer 141 ret 142ARCHEND(strrchr, scalar) 143 144ARCHENTRY(strrchr, baseline) 145 mov %edi, %ecx 146 and $~0xf, %rdi # align to 16 bytes 147 movdqa (%rdi), %xmm1 148 movd %esi, %xmm0 149 and $0xf, %ecx # offset from alignment 150 pxor %xmm2, %xmm2 151 mov $-1, %edx 152 punpcklbw %xmm0, %xmm0 # c -> cc 153 shl %cl, %edx # bits corresponding to bytes in the string 154 punpcklwd %xmm0, %xmm0 # cc -> cccc 155 xor %r8, %r8 # address of latest match 156 mov $1, %esi # bit mask of latest match 157 mov %rdi, %r9 # candidate location for next match 158 add $16, %rdi # advance to next chunk 159 160 /* check for match in head */ 161 pcmpeqb %xmm1, %xmm2 # NUL byte present? 162 pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc 163 pcmpeqb %xmm0, %xmm1 # c present? 164 pmovmskb %xmm2, %eax 165 pmovmskb %xmm1, %ecx 166 and %edx, %ecx # c present in the string? 167 and %edx, %eax # NUL present in the string? 168 jnz .Lend2 169 170 /* main loop unrolled twice */ 171 ALIGN_TEXT 1720: movdqa (%rdi), %xmm1 173 test %ecx, %ecx # was there a match in the last iter.? 174 cmovnz %r9, %r8 # remember match if any 175 cmovnz %ecx, %esi 176 pxor %xmm2, %xmm2 177 pcmpeqb %xmm1, %xmm2 # NUL byte present? 178 pcmpeqb %xmm0, %xmm1 # c present? 179 pmovmskb %xmm2, %eax 180 pmovmskb %xmm1, %ecx 181 test %eax, %eax # end of string in first half? 182 jnz .Lend 183 184 movdqa 16(%rdi), %xmm1 185 test %ecx, %ecx # was there a match in the last iter.? 186 cmovnz %rdi, %r8 # remember match if any 187 cmovnz %ecx, %esi 188 pxor %xmm2, %xmm2 189 pcmpeqb %xmm1, %xmm2 # NUL byte present? 190 pcmpeqb %xmm0, %xmm1 # c present? 191 pmovmskb %xmm2, %eax 192 pmovmskb %xmm1, %ecx 193 lea 16(%rdi), %r9 194 add $32, %rdi 195 test %eax, %eax # end of string in second half? 196 jz 0b 197 198 ALIGN_TEXT 199.Lend2: sub $16, %rdi 200.Lend: lea -1(%rax), %edx 201 xor %edx, %eax # mask of bytes in the string 202 and %eax, %ecx # c found in the tail? 203 cmovnz %rdi, %r8 204 cmovnz %ecx, %esi 205 bsr %esi, %esi # last location of c in (R8) 206 lea (%r8, %rsi, 1), %rax # pointer to match 207 ret 208ARCHEND(strrchr, baseline) 209 .section .note.GNU-stack,"",%progbits 210