xref: /freebsd/lib/libc/amd64/string/strrchr.S (revision 40dbb06fa73cac37d57563c07e55efd0cabbd488)
1/*-
2 * Copyright (c) 2023 The FreeBSD Foundation
3 * Copyright (c) 2026 Robert Clausecker <fuz@FreeBSD.org>
4 *
5 * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE
28 */
29
30#include <machine/asm.h>
31
32#include "amd64_archlevel.h"
33
34#define ALIGN_TEXT	.p2align 4,0x90	# 16-byte alignment, nop-filled
35
36	.weak	rindex
37	.set	rindex, strrchr
38
39ARCHFUNCS(strrchr)
40	ARCHFUNC(strrchr, scalar)
41	ARCHFUNC(strrchr, baseline)
42ENDARCHFUNCS(strrchr)
43
44ARCHENTRY(strrchr, scalar)
45	mov	%edi, %ecx
46	and	$~7, %rdi		# align to 8 byte
47	movzbl	%sil, %esi		# clear stray high bits
48	movabs	$0x0101010101010101, %r8
49	mov	(%rdi), %rax		# load first word
50	imul	%r8, %rsi		# replicate char 8 times
51
52	/*
53	 * Unaligned input: align to 8 bytes.  Then proceed the same
54	 * way as with aligned input, but prevent matches before the
55	 * beginning of the string.  This is achieved by oring 0x01
56	 * into each byte of the buffer before the string
57	 */
58	shl	$3, %ecx
59	mov	%r8, %r10
60	shl	%cl, %r10		# 0x01 where the string is
61	xor	%r8, %r10		# 0x01 where it is not
62	neg	%r8			# negate 01..01 so we can use lea
63	movabs	$0x8080808080808080, %r9
64
65	mov	%rsi, %rcx
66	xor	%rax, %rcx		# str ^ c
67	or	%r10, %rax		# ensure str != 0 before string
68	or	%r10, %rcx		# ensure str^c != 0 before string
69	xor	%r11, %r11		# vector of last match (0 -> no match)
70	add	$8, %rdi		# advance to next iteration
71	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
72	not	%rax			# ~str
73	and	%rdx, %rax		# (str - 0x01..01) & ~str
74	and	%r9, %rax		# NUL bytes in str, not including junk bits
75	jnz	2f			# end of string?
76
77	/* main loop */
78	ALIGN_TEXT
793:	mov	(%rdi), %rax		# str
80	bswap	%rcx			# (str ^ c) in reverse order, to find last match
81	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
82	not	%rcx			# ~(str ^ c)
83	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
84	and	%r9, %rcx		# matches in str, not including junk bits
85	cmovnz	%rdi, %r10		# if match found, update match vector
86	cmovnz	%rcx, %r11		# ... and match pointer
87
88	add	$8, %rdi		# advance to next iteration
89	mov	%rsi, %rcx
90	xor	%rax, %rcx		# str ^ c
91	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
92	not	%rax			# ~str
93	and	%rdx, %rax		# (str - 0x01..01) & ~str
94	and	%r9, %rax		# NUL bytes in str, not including junk bits
95	jz	3b			# end of string?
96
97	/* NUL found, check for match in tail */
982:	mov	%rax, %rdx
99	neg	%rax
100	xor	%rdx, %rax		# all bytes behind the NUL byte
101	or	%rax, %rcx		# (str ^ c) without matches behind NUL byte
102	bswap	%rcx			# (src ^ c) in reverse order, to find last match
103	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
104	not	%rcx			# ~(str ^ c)
105	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
106	and	%r9, %rcx		# matches in str, not including junk bits
107	cmovnz	%rdi, %r10		# if match found, update match vector
108	cmovnz	%rcx, %r11		# ... and match pointer
109	tzcnt	%r11, %rcx		# location of last match
110	lea	-1(%r10), %rax		# address of last character in vector
111	shr	$3, %ecx		# as byte offset
112	sub	%rcx, %rax		# subtract character offset
113	test	%r11, %r11		# was there actually a match?
114	cmovz	%r11, %rax		# if not, return null pointer
115	ret
116ARCHEND(strrchr, scalar)
117
118ARCHENTRY(strrchr, baseline)
119	mov		%edi, %ecx
120	and		$~0xf, %rdi		# align to 16 bytes
121	movdqa		(%rdi), %xmm1
122	movd		%esi, %xmm0
123	and		$0xf, %ecx		# offset from alignment
124	pxor		%xmm2, %xmm2
125	mov		$-1, %edx
126	punpcklbw	%xmm0, %xmm0		# c -> cc
127	shl		%cl, %edx		# bits corresponding to bytes in the string
128	punpcklwd	%xmm0, %xmm0		# cc -> cccc
129	xor		%r8, %r8		# address of latest match
130	mov		$1, %esi		# bit mask of latest match
131	mov		%rdi, %r9		# candidate location for next match
132	add		$16, %rdi		# advance to next chunk
133
134	/* check for match in head */
135	pcmpeqb		%xmm1, %xmm2		# NUL byte present?
136	pshufd		$0, %xmm0, %xmm0	# cccc -> cccccccccccccccc
137	pcmpeqb		%xmm0, %xmm1		# c present?
138	pmovmskb	%xmm2, %eax
139	pmovmskb	%xmm1, %ecx
140	and		%edx, %ecx		# c present in the string?
141	and		%edx, %eax		# NUL present in the string?
142	jnz		.Lend2
143
144	/* main loop unrolled twice */
145	ALIGN_TEXT
1460:	movdqa		(%rdi), %xmm1
147	test		%ecx, %ecx		# was there a match in the last iter.?
148	cmovnz		%r9, %r8		# remember match if any
149	cmovnz		%ecx, %esi
150	pxor		%xmm2, %xmm2
151	pcmpeqb		%xmm1, %xmm2		# NUL byte present?
152	pcmpeqb		%xmm0, %xmm1		# c present?
153	pmovmskb	%xmm2, %eax
154	pmovmskb	%xmm1, %ecx
155	test		%eax, %eax		# end of string in first half?
156	jnz		.Lend
157
158	movdqa		16(%rdi), %xmm1
159	test		%ecx, %ecx		# was there a match in the last iter.?
160	cmovnz		%rdi, %r8		# remember match if any
161	cmovnz		%ecx, %esi
162	pxor		%xmm2, %xmm2
163	pcmpeqb		%xmm1, %xmm2		# NUL byte present?
164	pcmpeqb		%xmm0, %xmm1		# c present?
165	pmovmskb	%xmm2, %eax
166	pmovmskb	%xmm1, %ecx
167	lea		16(%rdi), %r9
168	add		$32, %rdi
169	test		%eax, %eax		# end of string in second half?
170	jz		0b
171
172	ALIGN_TEXT
173.Lend2:	sub		$16, %rdi
174.Lend:	lea 		-1(%rax), %edx
175	xor		%edx, %eax		# mask of bytes in the string
176	and		%eax, %ecx		# c found in the tail?
177	cmovnz		%rdi, %r8
178	cmovnz		%ecx, %esi
179	bsr		%esi, %esi		# last location of c in (R8)
180	lea		(%r8, %rsi, 1), %rax	# pointer to match
181	ret
182ARCHEND(strrchr, baseline)
183	.section .note.GNU-stack,"",%progbits
184