xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S (revision 31914882fca502069810b9e9ddea4bcd8136a4f4)
1*31914882SAlex Richardson/*
2*31914882SAlex Richardson * strrchr - find the last of a character in a string
3*31914882SAlex Richardson *
4*31914882SAlex Richardson * Copyright (c) 2019-2021, Arm Limited.
5*31914882SAlex Richardson * SPDX-License-Identifier: MIT
6*31914882SAlex Richardson */
7*31914882SAlex Richardson
8*31914882SAlex Richardson#include "../asmdefs.h"
9*31914882SAlex Richardson
10*31914882SAlex Richardson#if __ARM_FEATURE_SVE
11*31914882SAlex Richardson/* Assumptions:
12*31914882SAlex Richardson *
13*31914882SAlex Richardson * ARMv8-a, AArch64
14*31914882SAlex Richardson * SVE Available.
15*31914882SAlex Richardson */
16*31914882SAlex Richardson
17*31914882SAlex RichardsonENTRY (__strrchr_aarch64_sve)
18*31914882SAlex Richardson	PTR_ARG (0)
19*31914882SAlex Richardson	dup	z1.b, w1		/* replicate byte across vector */
20*31914882SAlex Richardson	setffr				/* initialize FFR */
21*31914882SAlex Richardson	ptrue	p1.b			/* all ones; loop invariant */
22*31914882SAlex Richardson	mov	x2, 0			/* no match found so far */
23*31914882SAlex Richardson	pfalse	p2.b
24*31914882SAlex Richardson
25*31914882SAlex Richardson	.p2align 4
26*31914882SAlex Richardson	/* Read a vector's worth of bytes, stopping on first fault.  */
27*31914882SAlex Richardson0:	ldff1b	z0.b, p1/z, [x0, xzr]
28*31914882SAlex Richardson	rdffrs	p0.b, p1/z
29*31914882SAlex Richardson	b.nlast	1f
30*31914882SAlex Richardson
31*31914882SAlex Richardson	/* First fault did not fail: the whole vector is valid.
32*31914882SAlex Richardson	   Avoid depending on the contents of FFR beyond the branch.  */
33*31914882SAlex Richardson	incb	x0, all			/* skip bytes this round */
34*31914882SAlex Richardson	cmpeq	p3.b, p1/z, z0.b, 0	/* search for 0 */
35*31914882SAlex Richardson	b.any	3f
36*31914882SAlex Richardson
37*31914882SAlex Richardson	cmpeq	p3.b, p1/z, z0.b, z1.b	/* search for c; no eos */
38*31914882SAlex Richardson	b.none	0b
39*31914882SAlex Richardson
40*31914882SAlex Richardson	mov	x2, x0			/* save advanced base */
41*31914882SAlex Richardson	mov	p2.b, p3.b		/* save current search */
42*31914882SAlex Richardson	b	0b
43*31914882SAlex Richardson
44*31914882SAlex Richardson	/* First fault failed: only some of the vector is valid.
45*31914882SAlex Richardson	   Perform the comparisions only on the valid bytes.  */
46*31914882SAlex Richardson1:	cmpeq	p3.b, p0/z, z0.b, 0	/* search for 0 */
47*31914882SAlex Richardson	b.any	2f
48*31914882SAlex Richardson
49*31914882SAlex Richardson	cmpeq	p3.b, p0/z, z0.b, z1.b	/* search for c; no eos */
50*31914882SAlex Richardson	mov	x3, x0
51*31914882SAlex Richardson	incp	x0, p0.b		/* skip bytes this round */
52*31914882SAlex Richardson	setffr				/* re-init FFR */
53*31914882SAlex Richardson	b.none	0b
54*31914882SAlex Richardson
55*31914882SAlex Richardson	addvl	x2, x3, 1		/* save advanced base */
56*31914882SAlex Richardson	mov	p2.b, p3.b		/* save current search */
57*31914882SAlex Richardson	b	0b
58*31914882SAlex Richardson
59*31914882SAlex Richardson	/* Found end-of-string.  */
60*31914882SAlex Richardson2:	incb	x0, all			/* advance base */
61*31914882SAlex Richardson3:	brka	p3.b, p1/z, p3.b	/* mask after first 0 */
62*31914882SAlex Richardson	cmpeq	p3.b, p3/z, z0.b, z1.b	/* search for c not after eos */
63*31914882SAlex Richardson	b.any	4f
64*31914882SAlex Richardson
65*31914882SAlex Richardson	/* No C within last vector.  Did we have one before?  */
66*31914882SAlex Richardson	cbz	x2, 5f
67*31914882SAlex Richardson	mov	x0, x2			/* restore advanced base */
68*31914882SAlex Richardson	mov	p3.b, p2.b		/* restore saved search */
69*31914882SAlex Richardson
70*31914882SAlex Richardson	/* Find the *last* match in the predicate.  This is slightly
71*31914882SAlex Richardson	   more complicated than finding the first match.  */
72*31914882SAlex Richardson4:	rev	p3.b, p3.b		/* reverse the bits */
73*31914882SAlex Richardson	brka	p3.b, p1/z, p3.b	/* find position of last match */
74*31914882SAlex Richardson	decp	x0, p3.b		/* retard pointer to last match */
75*31914882SAlex Richardson	ret
76*31914882SAlex Richardson
77*31914882SAlex Richardson	/* No C whatsoever.  Return NULL.  */
78*31914882SAlex Richardson5:	mov	x0, 0
79*31914882SAlex Richardson	ret
80*31914882SAlex Richardson
81*31914882SAlex RichardsonEND (__strrchr_aarch64_sve)
82*31914882SAlex Richardson
83*31914882SAlex Richardson#endif
84*31914882SAlex Richardson
85