xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/strnlen.S (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1/*
2 * strnlen - calculate the length of a string with limit.
3 *
4 * Copyright (c) 2020-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8/* Assumptions:
9 *
10 * ARMv8-a, AArch64, Advanced SIMD.
11 * MTE compatible.
12 */
13
14#include "asmdefs.h"
15
16#define srcin		x0
17#define cntin		x1
18#define result		x0
19
20#define src		x2
21#define synd		x3
22#define	shift		x4
23#define tmp		x4
24#define cntrem		x5
25
26#define qdata		q0
27#define vdata		v0
28#define vhas_chr	v1
29#define vend		v2
30#define dend		d2
31
32/*
33   Core algorithm:
34   Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
35   four bits per byte using the shrn instruction. A count trailing zeros then
36   identifies the first zero byte.  */
37
38ENTRY (__strnlen_aarch64)
39	bic	src, srcin, 15
40	cbz	cntin, L(nomatch)
41	ld1	{vdata.16b}, [src]
42	cmeq	vhas_chr.16b, vdata.16b, 0
43	lsl	shift, srcin, 2
44	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
45	fmov	synd, dend
46	lsr	synd, synd, shift
47	cbz	synd, L(start_loop)
48L(finish):
49	rbit	synd, synd
50	clz	synd, synd
51	lsr	result, synd, 2
52	cmp	cntin, result
53	csel	result, cntin, result, ls
54	ret
55
56L(nomatch):
57	mov	result, cntin
58	ret
59
60L(start_loop):
61	sub	tmp, src, srcin
62	add	tmp, tmp, 17
63	subs	cntrem, cntin, tmp
64	b.lo	L(nomatch)
65
66	/* Make sure that it won't overread by a 16-byte chunk */
67	tbz	cntrem, 4, L(loop32_2)
68	sub	src, src, 16
69	.p2align 5
70L(loop32):
71	ldr	qdata, [src, 32]!
72	cmeq	vhas_chr.16b, vdata.16b, 0
73	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
74	fmov	synd, dend
75	cbnz	synd, L(end)
76L(loop32_2):
77	ldr	qdata, [src, 16]
78	subs	cntrem, cntrem, 32
79	cmeq	vhas_chr.16b, vdata.16b, 0
80	b.lo	L(end_2)
81	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
82	fmov	synd, dend
83	cbz	synd, L(loop32)
84L(end_2):
85	add	src, src, 16
86L(end):
87	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
88	sub	result, src, srcin
89	fmov	synd, dend
90#ifndef __AARCH64EB__
91	rbit	synd, synd
92#endif
93	clz	synd, synd
94	add	result, result, synd, lsr 2
95	cmp	cntin, result
96	csel	result, cntin, result, ls
97	ret
98
99END (__strnlen_aarch64)
100
101