1/*- 2 * Copyright (c) 2023 The FreeBSD Foundation 3 * Copyright (c) 2026 Robert Clausecker <fuz@FreeBSD.org> 4 * 5 * This software was developed by Robert Clausecker <fuz@FreeBSD.org> 6 * under sponsorship from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE 28 */ 29 30#include <machine/asm.h> 31 32#include "amd64_archlevel.h" 33 34#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled 35 36 .weak rindex 37 .set rindex, strrchr 38 39ARCHFUNCS(strrchr) 40 ARCHFUNC(strrchr, scalar) 41 ARCHFUNC(strrchr, baseline) 42ENDARCHFUNCS(strrchr) 43 44ARCHENTRY(strrchr, scalar) 45 mov %edi, %ecx 46 and $~7, %rdi # align to 8 byte 47 movzbl %sil, %esi # clear stray high bits 48 movabs $0x0101010101010101, %r8 49 mov (%rdi), %rax # load first word 50 imul %r8, %rsi # replicate char 8 times 51 52 /* 53 * Unaligned input: align to 8 bytes. Then proceed the same 54 * way as with aligned input, but prevent matches before the 55 * beginning of the string. This is achieved by oring 0x01 56 * into each byte of the buffer before the string 57 */ 58 shl $3, %ecx 59 mov %r8, %r10 60 shl %cl, %r10 # 0x01 where the string is 61 xor %r8, %r10 # 0x01 where it is not 62 neg %r8 # negate 01..01 so we can use lea 63 movabs $0x8080808080808080, %r9 64 65 mov %rsi, %rcx 66 xor %rax, %rcx # str ^ c 67 or %r10, %rax # ensure str != 0 before string 68 or %r10, %rcx # ensure str^c != 0 before string 69 xor %r11, %r11 # vector of last match (0 -> no match) 70 add $8, %rdi # advance to next iteration 71 lea (%rax, %r8, 1), %rdx # str - 0x01..01 72 not %rax # ~str 73 and %rdx, %rax # (str - 0x01..01) & ~str 74 and %r9, %rax # NUL bytes in str, not including junk bits 75 jnz 2f # end of string? 76 77 /* main loop */ 78 ALIGN_TEXT 793: mov (%rdi), %rax # str 80 bswap %rcx # (str ^ c) in reverse order, to find last match 81 lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 82 not %rcx # ~(str ^ c) 83 and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 84 and %r9, %rcx # matches in str, not including junk bits 85 cmovnz %rdi, %r10 # if match found, update match vector 86 cmovnz %rcx, %r11 # ... and match pointer 87 88 add $8, %rdi # advance to next iteration 89 mov %rsi, %rcx 90 xor %rax, %rcx # str ^ c 91 lea (%rax, %r8, 1), %rdx # str - 0x01..01 92 not %rax # ~str 93 and %rdx, %rax # (str - 0x01..01) & ~str 94 and %r9, %rax # NUL bytes in str, not including junk bits 95 jz 3b # end of string? 96 97 /* NUL found, check for match in tail */ 982: mov %rax, %rdx 99 neg %rax 100 xor %rdx, %rax # all bytes behind the NUL byte 101 or %rax, %rcx # (str ^ c) without matches behind NUL byte 102 bswap %rcx # (src ^ c) in reverse order, to find last match 103 lea (%rcx, %r8, 1), %rdx # (str ^ c) - 0x01..01 104 not %rcx # ~(str ^ c) 105 and %rdx, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c) 106 and %r9, %rcx # matches in str, not including junk bits 107 cmovnz %rdi, %r10 # if match found, update match vector 108 cmovnz %rcx, %r11 # ... and match pointer 109 tzcnt %r11, %rcx # location of last match 110 lea -1(%r10), %rax # address of last character in vector 111 shr $3, %ecx # as byte offset 112 sub %rcx, %rax # subtract character offset 113 test %r11, %r11 # was there actually a match? 114 cmovz %r11, %rax # if not, return null pointer 115 ret 116ARCHEND(strrchr, scalar) 117 118ARCHENTRY(strrchr, baseline) 119 mov %edi, %ecx 120 and $~0xf, %rdi # align to 16 bytes 121 movdqa (%rdi), %xmm1 122 movd %esi, %xmm0 123 and $0xf, %ecx # offset from alignment 124 pxor %xmm2, %xmm2 125 mov $-1, %edx 126 punpcklbw %xmm0, %xmm0 # c -> cc 127 shl %cl, %edx # bits corresponding to bytes in the string 128 punpcklwd %xmm0, %xmm0 # cc -> cccc 129 xor %r8, %r8 # address of latest match 130 mov $1, %esi # bit mask of latest match 131 mov %rdi, %r9 # candidate location for next match 132 add $16, %rdi # advance to next chunk 133 134 /* check for match in head */ 135 pcmpeqb %xmm1, %xmm2 # NUL byte present? 136 pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc 137 pcmpeqb %xmm0, %xmm1 # c present? 138 pmovmskb %xmm2, %eax 139 pmovmskb %xmm1, %ecx 140 and %edx, %ecx # c present in the string? 141 and %edx, %eax # NUL present in the string? 142 jnz .Lend2 143 144 /* main loop unrolled twice */ 145 ALIGN_TEXT 1460: movdqa (%rdi), %xmm1 147 test %ecx, %ecx # was there a match in the last iter.? 148 cmovnz %r9, %r8 # remember match if any 149 cmovnz %ecx, %esi 150 pxor %xmm2, %xmm2 151 pcmpeqb %xmm1, %xmm2 # NUL byte present? 152 pcmpeqb %xmm0, %xmm1 # c present? 153 pmovmskb %xmm2, %eax 154 pmovmskb %xmm1, %ecx 155 test %eax, %eax # end of string in first half? 156 jnz .Lend 157 158 movdqa 16(%rdi), %xmm1 159 test %ecx, %ecx # was there a match in the last iter.? 160 cmovnz %rdi, %r8 # remember match if any 161 cmovnz %ecx, %esi 162 pxor %xmm2, %xmm2 163 pcmpeqb %xmm1, %xmm2 # NUL byte present? 164 pcmpeqb %xmm0, %xmm1 # c present? 165 pmovmskb %xmm2, %eax 166 pmovmskb %xmm1, %ecx 167 lea 16(%rdi), %r9 168 add $32, %rdi 169 test %eax, %eax # end of string in second half? 170 jz 0b 171 172 ALIGN_TEXT 173.Lend2: sub $16, %rdi 174.Lend: lea -1(%rax), %edx 175 xor %edx, %eax # mask of bytes in the string 176 and %eax, %ecx # c found in the tail? 177 cmovnz %rdi, %r8 178 cmovnz %ecx, %esi 179 bsr %esi, %esi # last location of c in (R8) 180 lea (%r8, %rsi, 1), %rax # pointer to match 181 ret 182ARCHEND(strrchr, baseline) 183 .section .note.GNU-stack,"",%progbits 184