xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/experimental/strlen-sve.S (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1/*
2 * __strlen_aarch64_sve - compute the length of a string
3 *
4 * Copyright (c) 2018-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8#include "asmdefs.h"
9
10.arch armv8-a+sve
11
12/* Assumptions:
13 *
14 * ARMv8-a, AArch64
15 * SVE Available.
16 */
17
18ENTRY (__strlen_aarch64_sve)
19	setffr			/* initialize FFR */
20	ptrue	p2.b		/* all ones; loop invariant */
21	mov	x1, 0		/* initialize length */
22
23	/* Read a vector's worth of bytes, stopping on first fault.  */
24	.p2align 4
250:	ldff1b	z0.b, p2/z, [x0, x1]
26	rdffrs	p0.b, p2/z
27	b.nlast	2f
28
29	/* First fault did not fail: the whole vector is valid.
30	   Avoid depending on the contents of FFR beyond the branch.  */
31	incb	x1, all			/* speculate increment */
32	cmpeq	p1.b, p2/z, z0.b, 0	/* loop if no zeros */
33	b.none	0b
34	decb	x1, all			/* undo speculate */
35
36	/* Zero found.  Select the bytes before the first and count them.  */
371:	brkb	p0.b, p2/z, p1.b
38	incp	x1, p0.b
39	mov	x0, x1
40	ret
41
42	/* First fault failed: only some of the vector is valid.
43	   Perform the comparison only on the valid bytes.  */
442:	cmpeq	p1.b, p0/z, z0.b, 0
45	b.any	1b
46
47	/* No zero found.  Re-init FFR, increment, and loop.  */
48	setffr
49	incp	x1, p0.b
50	b	0b
51
52END (__strlen_aarch64_sve)
53