xref: /freebsd/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1/*
2 * strlen - calculate the length of a string
3 *
4 * Copyright (c) 2010-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8#if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
9
10/*
11   Assumes:
12   ARMv6T2, AArch32
13
14 */
15
16#include "asmdefs.h"
17
18#ifdef __ARMEB__
19#define S2LO		lsl
20#define S2HI		lsr
21#else
22#define S2LO		lsr
23#define S2HI		lsl
24#endif
25
26/* Ensure the .cantunwind directive is prepended to .fnend.
27   Leaf functions cannot throw exceptions - EHABI only supports
28   synchronous exceptions.  */
29#define IS_LEAF
30
31	/* This code requires Thumb.  */
32	.thumb
33	.syntax unified
34
35/* Parameters and result.  */
36#define srcin		r0
37#define result		r0
38
39/* Internal variables.  */
40#define src		r1
41#define data1a		r2
42#define data1b		r3
43#define const_m1	r12
44#define const_0		r4
45#define tmp1		r4		/* Overlaps const_0  */
46#define tmp2		r5
47
48ENTRY (__strlen_armv6t2)
49	prologue 4 5 push_ip=HAVE_PAC_LEAF
50	pld	[srcin, #0]
51	bic	src, srcin, #7
52	mvn	const_m1, #0
53	ands	tmp1, srcin, #7		/* (8 - bytes) to alignment.  */
54	pld	[src, #32]
55	bne.w	L(misaligned8)
56	mov	const_0, #0
57	mov	result, #-8
58L(loop_aligned):
59	/* Bytes 0-7.  */
60	ldrd	data1a, data1b, [src]
61	pld	[src, #64]
62	add	result, result, #8
63L(start_realigned):
64	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
65	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
66	uadd8	data1b, data1b, const_m1
67	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
68	cbnz	data1b, L(null_found)
69
70	/* Bytes 8-15.  */
71	ldrd	data1a, data1b, [src, #8]
72	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
73	add	result, result, #8
74	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
75	uadd8	data1b, data1b, const_m1
76	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
77	cbnz	data1b, L(null_found)
78
79	/* Bytes 16-23.  */
80	ldrd	data1a, data1b, [src, #16]
81	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
82	add	result, result, #8
83	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
84	uadd8	data1b, data1b, const_m1
85	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
86	cbnz	data1b, L(null_found)
87
88	/* Bytes 24-31.  */
89	ldrd	data1a, data1b, [src, #24]
90	add	src, src, #32
91	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
92	add	result, result, #8
93	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
94	uadd8	data1b, data1b, const_m1
95	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
96	cmp	data1b, #0
97	beq	L(loop_aligned)
98
99L(null_found):
100	.cfi_remember_state
101	cmp	data1a, #0
102	itt	eq
103	addeq	result, result, #4
104	moveq	data1a, data1b
105#ifndef __ARMEB__
106	rev	data1a, data1a
107#endif
108	clz	data1a, data1a
109	add	result, result, data1a, lsr #3	/* Bits -> Bytes.  */
110	epilogue 4 5 push_ip=HAVE_PAC_LEAF
111
112L(misaligned8):
113	.cfi_restore_state
114	ldrd	data1a, data1b, [src]
115	and	tmp2, tmp1, #3
116	rsb	result, tmp1, #0
117	lsl	tmp2, tmp2, #3			/* Bytes -> bits.  */
118	tst	tmp1, #4
119	pld	[src, #64]
120	S2HI	tmp2, const_m1, tmp2
121	orn	data1a, data1a, tmp2
122	itt	ne
123	ornne	data1b, data1b, tmp2
124	movne	data1a, const_m1
125	mov	const_0, #0
126	b	L(start_realigned)
127
128END (__strlen_armv6t2)
129
130#endif /* __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2  */
131