xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1/*
2 * strchr - find a character in a string
3 *
4 * Copyright (c) 2020-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8/* Assumptions:
9 *
10 * ARMv8-a, AArch64, Advanced SIMD.
11 * MTE compatible.
12 */
13
14#include "asmdefs.h"
15
16#define srcin		x0
17#define chrin		w1
18#define result		x0
19
20#define src		x2
21#define tmp1		x1
22#define tmp2		x3
23
24#define vrepchr		v0
25#define vdata		v1
26#define qdata		q1
27#define vhas_nul	v2
28#define vhas_chr	v3
29#define vrepmask	v4
30#define vend		v5
31#define dend		d5
32
33/* Core algorithm.
34
35   For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
36   per byte. Bits 0-1 are set if the relevant byte matched the requested
37   character, bits 2-3 are set if the byte is NUL or matched. Count trailing
38   zeroes gives the position of the matching byte if it is a multiple of 4.
39   If it is not a multiple of 4, there was no match.  */
40
41ENTRY (__strchr_aarch64_mte)
42	bic	src, srcin, 15
43	dup	vrepchr.16b, chrin
44	ld1	{vdata.16b}, [src]
45	movi	vrepmask.16b, 0x33
46	cmeq	vhas_nul.16b, vdata.16b, 0
47	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
48	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
49	lsl	tmp2, srcin, 2
50	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
51	fmov	tmp1, dend
52	lsr	tmp1, tmp1, tmp2
53	cbz	tmp1, L(loop)
54
55	rbit	tmp1, tmp1
56	clz	tmp1, tmp1
57	/* Tmp1 is an even multiple of 2 if the target character was
58	   found first. Otherwise we've found the end of string.  */
59	tst	tmp1, 2
60	add	result, srcin, tmp1, lsr 2
61	csel	result, result, xzr, eq
62	ret
63
64	.p2align 4
65L(loop):
66	ldr	qdata, [src, 16]
67	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
68	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
69	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
70	fmov	tmp1, dend
71	cbnz	tmp1, L(end)
72	ldr	qdata, [src, 32]!
73	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
74	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
75	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
76	fmov	tmp1, dend
77	cbz	tmp1, L(loop)
78	sub	src, src, 16
79L(end):
80
81#ifdef __AARCH64EB__
82	bif	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
83	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
84	fmov	tmp1, dend
85#else
86	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
87	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
88	fmov	tmp1, dend
89	rbit	tmp1, tmp1
90#endif
91	add	src, src, 16
92	clz	tmp1, tmp1
93	/* Tmp1 is a multiple of 4 if the target character was found.  */
94	tst	tmp1, 2
95	add	result, src, tmp1, lsr 2
96	csel	result, result, xzr, eq
97	ret
98
99END (__strchr_aarch64_mte)
100
101