1/*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023, 2025 Robert Clausecker <fuz@FreeBSD.org> 5 */ 6 7#include <machine/asm.h> 8 9#include "amd64_archlevel.h" 10 11#define ALIGN_TEXT .p2align 4, 0x90 12 13ARCHFUNCS(memrchr) 14 ARCHFUNC(memrchr, scalar) 15 ARCHFUNC(memrchr, baseline) 16ENDARCHFUNCS(memrchr) 17 18ARCHENTRY(memrchr, scalar) 19 xor %eax, %eax # prospective return value 20 sub $4, %rdx # 4 bytes left to process? 21 jb 1f 22 23 ALIGN_TEXT 240: xor %r8, %r8 25 lea 2(%rdi), %r10 26 cmp %sil, 2(%rdi) 27 cmovne %r8, %r10 # point to null if no match 28 29 cmp %sil, (%rdi) 30 cmove %rdi, %r8 # point to first char if match 31 32 lea 1(%rdi), %r9 33 cmp %sil, 1(%rdi) 34 cmovne %r8, %r9 # point to first result if no match in second 35 36 lea 3(%rdi), %r11 37 cmp %sil, 3(%rdi) 38 cmovne %r10, %r11 39 40 test %r11, %r11 41 cmovz %r9, %r11 # take first pair match if none in second 42 43 test %r11, %r11 44 cmovnz %r11, %rax # take match in current set if any 45 46 add $4, %rdi 47 sub $4, %rdx 48 jae 0b 49 501: cmp $-3, %edx # a least one character left to process? 51 jb 2f 52 53 cmp %sil, (%rdi) 54 cmove %rdi, %rax 55 56 lea 1(%rdi), %rcx 57 cmp $-2, %edx # at least two characters left to process? 58 jb 2f 59 60 cmp %sil, 1(%rdi) 61 cmove %rcx, %rax 62 63 lea 2(%rdi), %rcx 64 cmp $-1, %edx # at least three character left to process? 65 jb 2f 66 67 cmp %sil, 2(%rdi) 68 cmove %rcx, %rax 69 702: ret 71ARCHEND(memrchr, scalar) 72 73ARCHENTRY(memrchr, baseline) 74 test %rdx, %rdx # empty input? 75 je .Lnomatchb 76 77 78 lea (%rdi, %rdx, 1), %ecx # pointer to end of buffer 79 lea -1(%rdi, %rdx, 1), %rdx # pointer to last char in buffer 80 movd %esi, %xmm2 81 and $~0x1f, %rdx # pointer to final 32 buffer bytes 82 movdqa (%rdx), %xmm0 # load last 32 bytes 83 movdqa 16(%rdx), %xmm1 84 85 punpcklbw %xmm2, %xmm2 # c -> cc 86 87 mov $-1, %r8d 88 neg %ecx 89 mov %r8d, %r9d 90 shr %cl, %r8d # mask with zeroes after the string 91 92 punpcklwd %xmm2, %xmm2 # cc -> cccc 93 94 mov %edi, %ecx 95 mov %r9d, %eax 96 shl %cl, %r9d # mask with zeroes before the string 97 98 pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc 99 100 cmp %rdx, %rdi # tail is beginning of buffer? 101 cmovae %r9d, %eax # if yes, do combined head/tail processing 102 and %r8d, %eax # mak of bytes in tail part of string 103 104 /* process tail */ 105 pcmpeqb %xmm2, %xmm1 106 pcmpeqb %xmm2, %xmm0 107 pmovmskb %xmm1, %esi 108 pmovmskb %xmm0, %ecx 109 shl $16, %esi 110 or %esi, %ecx # locations of matches 111 and %ecx, %eax # any match inside buffer? 112 jnz .Lprecisematchb 113 114 cmp %rdx, %rdi # did the buffer begin here? 115 jae .Lnomatchb # if yes, we are done 116 117 /* main loop */ 118 ALIGN_TEXT 1190: movdqa -32(%rdx), %xmm0 # load previous string chunk 120 movdqa -16(%rdx), %xmm1 121 sub $32, %rdx # beginning of string reached? 122 cmp %rdx, %rdi 123 jae .Ltailb 124 125 pcmpeqb %xmm2, %xmm0 126 pcmpeqb %xmm2, %xmm1 127 por %xmm1, %xmm0 # match in either half? 128 pmovmskb %xmm0, %eax 129 test %eax, %eax 130 jz 0b 131 132.Lmatchb: 133 pcmpeqb (%rdx), %xmm2 # redo comparison of first 16 bytes 134 pmovmskb %xmm1, %ecx 135 pmovmskb %xmm2, %eax 136 shl $16, %ecx 137 or %ecx, %eax # location of matches 138 139.Lprecisematchb: 140 bsr %eax, %eax # find location of match 141 add %rdx, %rax # point to matching byte 142 ret 143 144.Ltailb: 145 pcmpeqb %xmm2, %xmm1 146 pcmpeqb %xmm2, %xmm0 147 pmovmskb %xmm1, %ecx 148 pmovmskb %xmm0, %eax 149 shl $16, %ecx 150 or %ecx, %eax # location of matches 151 and %r9d, %eax # mask out matches before buffer 152 bsr %eax, %edi # location of match 153 lea (%rdx, %rdi, 1), %rdx # pointer to match (if any) 154 cmovnz %rdx, %rax # point to match if present, 155 ret # else null pointer 156 157.Lnomatchb: 158 xor %eax, %eax # return null pointer 159 ret 160ARCHEND(memrchr, baseline) 161 162 .section .note.GNU-stack, "", %progbits 163