xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/experimental/strcmp-sve.S (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1/*
2 * __strcmp_aarch64_sve - compare two strings
3 *
4 * Copyright (c) 2018-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8#include "asmdefs.h"
9
10.arch armv8-a+sve
11
12/* Assumptions:
13 *
14 * ARMv8-a, AArch64
15 * SVE Available.
16 */
17
18ENTRY (__strcmp_aarch64_sve)
19	setffr				/* initialize FFR */
20	ptrue	p1.b, all		/* all ones; loop invariant */
21	mov	x2, 0			/* initialize offset */
22
23	/* Read a vector's worth of bytes, stopping on first fault.  */
24	.p2align 4
250:	ldff1b	z0.b, p1/z, [x0, x2]
26	ldff1b	z1.b, p1/z, [x1, x2]
27	rdffrs	p0.b, p1/z
28	b.nlast	2f
29
30	/* First fault did not fail: the whole vector is valid.
31	   Avoid depending on the contents of FFR beyond the branch.  */
32	incb	x2, all			/* skip bytes for next round */
33	cmpeq	p2.b, p1/z, z0.b, z1.b	/* compare strings */
34	cmpne	p3.b, p1/z, z0.b, 0	/* search for ~zero */
35	nands	p2.b, p1/z, p2.b, p3.b	/* ~(eq & ~zero) -> ne | zero */
36	b.none	0b
37
38	/* Found end-of-string or inequality.  */
391:	brkb	p2.b, p1/z, p2.b	/* find first such */
40	lasta	w0, p2, z0.b		/* extract each char */
41	lasta	w1, p2, z1.b
42	sub	x0, x0, x1		/* return comparison */
43	ret
44
45	/* First fault failed: only some of the vector is valid.
46	   Perform the comparison only on the valid bytes.  */
472:	incp	x2, p0.b		/* skip bytes for next round */
48	setffr				/* re-init FFR for next round */
49	cmpeq	p2.b, p0/z, z0.b, z1.b	/* compare strings, as above */
50	cmpne	p3.b, p0/z, z0.b, 0
51	nands	p2.b, p0/z, p2.b, p3.b
52	b.none	0b
53	b	1b
54
55END (__strcmp_aarch64_sve)
56