1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023, 2025 Robert Clausecker <fuz@FreeBSD.org> 5 */ 6 7#include <machine/asm.h> 8 9#include "amd64_archlevel.h" 10 11#define ALIGN_TEXT .p2align 4, 0x90 12 13ARCHFUNCS(memrchr) 14 ARCHFUNC(memrchr, scalar) 15 ARCHFUNC(memrchr, baseline) 16ENDARCHFUNCS(memrchr) 17 18ARCHENTRY(memrchr, scalar) 19 lea -1(%rdi, %rdx, 1), %rax # point to last char in buffer 20 sub $4, %rdx # 4 bytes left to process? 21 jb .Ltail 22 23 ALIGN_TEXT 240: cmp %sil, (%rax) # match at last entry? 25 je 1f 26 27 cmp %sil, -1(%rax) # match at second to last entry? 28 je 2f 29 30 cmp %sil, -2(%rax) # match at third to last entry? 31 je 3f 32 33 cmp %sil, -3(%rax) # match at fourth to last entry? 34 je 4f 35 36 sub $4, %rax 37 sub $4, %rdx 38 jae 0b 39 40.Ltail: cmp $-3, %edx # at least one character left to process? 41 jb .Lnotfound 42 43 cmp %sil, (%rax) 44 je 1f 45 46 cmp $-2, %edx # at least two characters left to process? 47 jb .Lnotfound 48 49 cmp %sil, -1(%rax) 50 je 2f 51 52 cmp $-1, %edx # at least three characters left to process? 53 jb .Lnotfound 54 55 cmp %sil, -2(%rax) 56 je 3f 57 58.Lnotfound: 59 xor %eax, %eax 60 ret 61 62 /* match found -- adjust rax to point to matching byte */ 634: dec %rax 643: dec %rax 652: dec %rax 661: ret 67ARCHEND(memrchr, scalar) 68 69ARCHENTRY(memrchr, baseline) 70 test %rdx, %rdx # empty input? 71 je .Lnomatchb 72 73 74 lea (%rdi, %rdx, 1), %ecx # pointer to end of buffer 75 lea -1(%rdi, %rdx, 1), %rdx # pointer to last char in buffer 76 movd %esi, %xmm2 77 and $~0x1f, %rdx # pointer to final 32 buffer bytes 78 movdqa (%rdx), %xmm0 # load last 32 bytes 79 movdqa 16(%rdx), %xmm1 80 81 punpcklbw %xmm2, %xmm2 # c -> cc 82 83 mov $-1, %r8d 84 neg %ecx 85 mov %r8d, %r9d 86 shr %cl, %r8d # mask with zeroes after the string 87 88 punpcklwd %xmm2, %xmm2 # cc -> cccc 89 90 mov %edi, %ecx 91 mov %r9d, %eax 92 shl %cl, %r9d # mask with zeroes before the string 93 94 pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc 95 96 cmp %rdx, %rdi # tail is beginning of buffer? 97 cmovae %r9d, %eax # if yes, do combined head/tail processing 98 and %r8d, %eax # mak of bytes in tail part of string 99 100 /* process tail */ 101 pcmpeqb %xmm2, %xmm1 102 pcmpeqb %xmm2, %xmm0 103 pmovmskb %xmm1, %esi 104 pmovmskb %xmm0, %ecx 105 shl $16, %esi 106 or %esi, %ecx # locations of matches 107 and %ecx, %eax # any match inside buffer? 108 jnz .Lprecisematchb 109 110 cmp %rdx, %rdi # did the buffer begin here? 111 jae .Lnomatchb # if yes, we are done 112 113 /* main loop */ 114 ALIGN_TEXT 1150: movdqa -32(%rdx), %xmm0 # load previous string chunk 116 movdqa -16(%rdx), %xmm1 117 sub $32, %rdx # beginning of string reached? 118 cmp %rdx, %rdi 119 jae .Ltailb 120 121 pcmpeqb %xmm2, %xmm0 122 pcmpeqb %xmm2, %xmm1 123 por %xmm1, %xmm0 # match in either half? 124 pmovmskb %xmm0, %eax 125 test %eax, %eax 126 jz 0b 127 128.Lmatchb: 129 pcmpeqb (%rdx), %xmm2 # redo comparison of first 16 bytes 130 pmovmskb %xmm1, %ecx 131 pmovmskb %xmm2, %eax 132 shl $16, %ecx 133 or %ecx, %eax # location of matches 134 135.Lprecisematchb: 136 bsr %eax, %eax # find location of match 137 add %rdx, %rax # point to matching byte 138 ret 139 140.Ltailb: 141 pcmpeqb %xmm2, %xmm1 142 pcmpeqb %xmm2, %xmm0 143 pmovmskb %xmm1, %ecx 144 pmovmskb %xmm0, %eax 145 shl $16, %ecx 146 or %ecx, %eax # location of matches 147 and %r9d, %eax # mask out matches before buffer 148 bsr %eax, %edi # location of match 149 lea (%rdx, %rdi, 1), %rdx # pointer to match (if any) 150 cmovnz %rdx, %rax # point to match if present, 151 ret # else null pointer 152 153.Lnomatchb: 154 xor %eax, %eax # return null pointer 155 ret 156ARCHEND(memrchr, baseline) 157 158 .section .note.GNU-stack, "", %progbits 159