xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S (revision 31914882fca502069810b9e9ddea4bcd8136a4f4)
1*31914882SAlex Richardson/*
2*31914882SAlex Richardson * __strlen_aarch64_sve - compute the length of a string
3*31914882SAlex Richardson *
4*31914882SAlex Richardson * Copyright (c) 2018-2021, Arm Limited.
5*31914882SAlex Richardson * SPDX-License-Identifier: MIT
6*31914882SAlex Richardson */
7*31914882SAlex Richardson
8*31914882SAlex Richardson#include "../asmdefs.h"
9*31914882SAlex Richardson
10*31914882SAlex Richardson#if __ARM_FEATURE_SVE
11*31914882SAlex Richardson/* Assumptions:
12*31914882SAlex Richardson *
13*31914882SAlex Richardson * ARMv8-a, AArch64
14*31914882SAlex Richardson * SVE Available.
15*31914882SAlex Richardson */
16*31914882SAlex Richardson
17*31914882SAlex RichardsonENTRY (__strlen_aarch64_sve)
18*31914882SAlex Richardson	PTR_ARG (0)
19*31914882SAlex Richardson	setffr			/* initialize FFR */
20*31914882SAlex Richardson	ptrue	p2.b		/* all ones; loop invariant */
21*31914882SAlex Richardson	mov	x1, 0		/* initialize length */
22*31914882SAlex Richardson
23*31914882SAlex Richardson	/* Read a vector's worth of bytes, stopping on first fault.  */
24*31914882SAlex Richardson	.p2align 4
25*31914882SAlex Richardson0:	ldff1b	z0.b, p2/z, [x0, x1]
26*31914882SAlex Richardson	rdffrs	p0.b, p2/z
27*31914882SAlex Richardson	b.nlast	2f
28*31914882SAlex Richardson
29*31914882SAlex Richardson	/* First fault did not fail: the whole vector is valid.
30*31914882SAlex Richardson	   Avoid depending on the contents of FFR beyond the branch.  */
31*31914882SAlex Richardson	incb	x1, all			/* speculate increment */
32*31914882SAlex Richardson	cmpeq	p1.b, p2/z, z0.b, 0	/* loop if no zeros */
33*31914882SAlex Richardson	b.none	0b
34*31914882SAlex Richardson	decb	x1, all			/* undo speculate */
35*31914882SAlex Richardson
36*31914882SAlex Richardson	/* Zero found.  Select the bytes before the first and count them.  */
37*31914882SAlex Richardson1:	brkb	p0.b, p2/z, p1.b
38*31914882SAlex Richardson	incp	x1, p0.b
39*31914882SAlex Richardson	mov	x0, x1
40*31914882SAlex Richardson	ret
41*31914882SAlex Richardson
42*31914882SAlex Richardson	/* First fault failed: only some of the vector is valid.
43*31914882SAlex Richardson	   Perform the comparison only on the valid bytes.  */
44*31914882SAlex Richardson2:	cmpeq	p1.b, p0/z, z0.b, 0
45*31914882SAlex Richardson	b.any	1b
46*31914882SAlex Richardson
47*31914882SAlex Richardson	/* No zero found.  Re-init FFR, increment, and loop.  */
48*31914882SAlex Richardson	setffr
49*31914882SAlex Richardson	incp	x1, p0.b
50*31914882SAlex Richardson	b	0b
51*31914882SAlex Richardson
52*31914882SAlex RichardsonEND (__strlen_aarch64_sve)
53*31914882SAlex Richardson
54*31914882SAlex Richardson#endif
55*31914882SAlex Richardson
56