xref: /freebsd/contrib/arm-optimized-routines/string/aarch64/strnlen.S (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1/*
2 * strnlen - calculate the length of a string with limit.
3 *
4 * Copyright (c) 2020-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8/* Assumptions:
9 *
10 * ARMv8-a, AArch64, Advanced SIMD.
11 * MTE compatible.
12 */
13
14#include "asmdefs.h"
15
16#define srcin		x0
17#define cntin		x1
18#define result		x0
19
20#define src		x2
21#define synd		x3
22#define	shift		x4
23#define tmp		x4
24#define cntrem		x5
25
26#define qdata		q0
27#define vdata		v0
28#define vhas_chr	v1
29#define vend		v2
30#define dend		d2
31
32/*
33   Core algorithm:
34   Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
35   four bits per byte using the shrn instruction. A count trailing zeros then
36   identifies the first zero byte.  */
37
38ENTRY (__strnlen_aarch64)
39	PTR_ARG (0)
40	SIZE_ARG (1)
41	bic	src, srcin, 15
42	cbz	cntin, L(nomatch)
43	ld1	{vdata.16b}, [src]
44	cmeq	vhas_chr.16b, vdata.16b, 0
45	lsl	shift, srcin, 2
46	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
47	fmov	synd, dend
48	lsr	synd, synd, shift
49	cbz	synd, L(start_loop)
50L(finish):
51	rbit	synd, synd
52	clz	synd, synd
53	lsr	result, synd, 2
54	cmp	cntin, result
55	csel	result, cntin, result, ls
56	ret
57
58L(nomatch):
59	mov	result, cntin
60	ret
61
62L(start_loop):
63	sub	tmp, src, srcin
64	add	tmp, tmp, 17
65	subs	cntrem, cntin, tmp
66	b.lo	L(nomatch)
67
68	/* Make sure that it won't overread by a 16-byte chunk */
69	tbz	cntrem, 4, L(loop32_2)
70	sub	src, src, 16
71	.p2align 5
72L(loop32):
73	ldr	qdata, [src, 32]!
74	cmeq	vhas_chr.16b, vdata.16b, 0
75	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
76	fmov	synd, dend
77	cbnz	synd, L(end)
78L(loop32_2):
79	ldr	qdata, [src, 16]
80	subs	cntrem, cntrem, 32
81	cmeq	vhas_chr.16b, vdata.16b, 0
82	b.lo	L(end_2)
83	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
84	fmov	synd, dend
85	cbz	synd, L(loop32)
86L(end_2):
87	add	src, src, 16
88L(end):
89	shrn	vend.8b, vhas_chr.8h, 4		/* 128->64 */
90	sub	result, src, srcin
91	fmov	synd, dend
92#ifndef __AARCH64EB__
93	rbit	synd, synd
94#endif
95	clz	synd, synd
96	add	result, result, synd, lsr 2
97	cmp	cntin, result
98	csel	result, cntin, result, ls
99	ret
100
101END (__strnlen_aarch64)
102
103