xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/experimental/strcpy-sve.S (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner/*
2*f3087befSAndrew Turner * strcpy/stpcpy - copy a string returning pointer to start/end.
3*f3087befSAndrew Turner *
4*f3087befSAndrew Turner * Copyright (c) 2018-2022, Arm Limited.
5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner */
7*f3087befSAndrew Turner
8*f3087befSAndrew Turner#include "asmdefs.h"
9*f3087befSAndrew Turner
10*f3087befSAndrew Turner.arch armv8-a+sve
11*f3087befSAndrew Turner
12*f3087befSAndrew Turner/* Assumptions:
13*f3087befSAndrew Turner *
14*f3087befSAndrew Turner * ARMv8-a, AArch64
15*f3087befSAndrew Turner * SVE Available.
16*f3087befSAndrew Turner */
17*f3087befSAndrew Turner
18*f3087befSAndrew Turner/* To build as stpcpy, define BUILD_STPCPY before compiling this file.  */
19*f3087befSAndrew Turner#ifdef BUILD_STPCPY
20*f3087befSAndrew Turner#define FUNC  __stpcpy_aarch64_sve
21*f3087befSAndrew Turner#else
22*f3087befSAndrew Turner#define FUNC  __strcpy_aarch64_sve
23*f3087befSAndrew Turner#endif
24*f3087befSAndrew Turner
25*f3087befSAndrew TurnerENTRY (FUNC)
26*f3087befSAndrew Turner	setffr				/* initialize FFR */
27*f3087befSAndrew Turner	ptrue	p2.b, all		/* all ones; loop invariant */
28*f3087befSAndrew Turner	mov	x2, 0			/* initialize offset */
29*f3087befSAndrew Turner
30*f3087befSAndrew Turner	.p2align 4
31*f3087befSAndrew Turner	/* Read a vector's worth of bytes, stopping on first fault.  */
32*f3087befSAndrew Turner0:	ldff1b	z0.b, p2/z, [x1, x2]
33*f3087befSAndrew Turner	rdffrs	p0.b, p2/z
34*f3087befSAndrew Turner	b.nlast	1f
35*f3087befSAndrew Turner
36*f3087befSAndrew Turner	/* First fault did not fail: the whole vector is valid.
37*f3087befSAndrew Turner	   Avoid depending on the contexts of FFR beyond the branch.  */
38*f3087befSAndrew Turner	cmpeq	p1.b, p2/z, z0.b, 0	/* search for zeros */
39*f3087befSAndrew Turner	b.any	2f
40*f3087befSAndrew Turner
41*f3087befSAndrew Turner	/* No zero found.  Store the whole vector and loop.  */
42*f3087befSAndrew Turner	st1b	z0.b, p2, [x0, x2]
43*f3087befSAndrew Turner	incb	x2, all
44*f3087befSAndrew Turner	b	0b
45*f3087befSAndrew Turner
46*f3087befSAndrew Turner	/* First fault failed: only some of the vector is valid.
47*f3087befSAndrew Turner	   Perform the comparison only on the valid bytes.  */
48*f3087befSAndrew Turner1:	cmpeq	p1.b, p0/z, z0.b, 0	/* search for zeros */
49*f3087befSAndrew Turner	b.any	2f
50*f3087befSAndrew Turner
51*f3087befSAndrew Turner	/* No zero found.  Store the valid portion of the vector and loop.  */
52*f3087befSAndrew Turner	setffr				/* re-init FFR */
53*f3087befSAndrew Turner	st1b	z0.b, p0, [x0, x2]
54*f3087befSAndrew Turner	incp	x2, p0.b
55*f3087befSAndrew Turner	b	0b
56*f3087befSAndrew Turner
57*f3087befSAndrew Turner	/* Zero found.  Crop the vector to the found zero and finish.  */
58*f3087befSAndrew Turner2:	brka	p0.b, p2/z, p1.b
59*f3087befSAndrew Turner	st1b	z0.b, p0, [x0, x2]
60*f3087befSAndrew Turner#ifdef BUILD_STPCPY
61*f3087befSAndrew Turner	add	x0, x0, x2
62*f3087befSAndrew Turner	sub	x0, x0, 1
63*f3087befSAndrew Turner	incp	x0, p0.b
64*f3087befSAndrew Turner#endif
65*f3087befSAndrew Turner	ret
66*f3087befSAndrew Turner
67*f3087befSAndrew TurnerEND (FUNC)
68