xref: /freebsd/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1/*
2 * strlen - calculate the length of a string
3 *
4 * Copyright (c) 2010-2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8#if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
9
10/*
11   Assumes:
12   ARMv6T2, AArch32
13
14 */
15
16#include "../asmdefs.h"
17
18#ifdef __ARMEB__
19#define S2LO		lsl
20#define S2HI		lsr
21#else
22#define S2LO		lsr
23#define S2HI		lsl
24#endif
25
26	/* This code requires Thumb.  */
27	.thumb
28	.syntax unified
29
30/* Parameters and result.  */
31#define srcin		r0
32#define result		r0
33
34/* Internal variables.  */
35#define src		r1
36#define data1a		r2
37#define data1b		r3
38#define const_m1	r12
39#define const_0		r4
40#define tmp1		r4		/* Overlaps const_0  */
41#define tmp2		r5
42
43ENTRY (__strlen_armv6t2)
44	pld	[srcin, #0]
45	strd	r4, r5, [sp, #-8]!
46	bic	src, srcin, #7
47	mvn	const_m1, #0
48	ands	tmp1, srcin, #7		/* (8 - bytes) to alignment.  */
49	pld	[src, #32]
50	bne.w	L(misaligned8)
51	mov	const_0, #0
52	mov	result, #-8
53L(loop_aligned):
54	/* Bytes 0-7.  */
55	ldrd	data1a, data1b, [src]
56	pld	[src, #64]
57	add	result, result, #8
58L(start_realigned):
59	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
60	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
61	uadd8	data1b, data1b, const_m1
62	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
63	cbnz	data1b, L(null_found)
64
65	/* Bytes 8-15.  */
66	ldrd	data1a, data1b, [src, #8]
67	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
68	add	result, result, #8
69	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
70	uadd8	data1b, data1b, const_m1
71	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
72	cbnz	data1b, L(null_found)
73
74	/* Bytes 16-23.  */
75	ldrd	data1a, data1b, [src, #16]
76	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
77	add	result, result, #8
78	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
79	uadd8	data1b, data1b, const_m1
80	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
81	cbnz	data1b, L(null_found)
82
83	/* Bytes 24-31.  */
84	ldrd	data1a, data1b, [src, #24]
85	add	src, src, #32
86	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
87	add	result, result, #8
88	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
89	uadd8	data1b, data1b, const_m1
90	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
91	cmp	data1b, #0
92	beq	L(loop_aligned)
93
94L(null_found):
95	cmp	data1a, #0
96	itt	eq
97	addeq	result, result, #4
98	moveq	data1a, data1b
99#ifndef __ARMEB__
100	rev	data1a, data1a
101#endif
102	clz	data1a, data1a
103	ldrd	r4, r5, [sp], #8
104	add	result, result, data1a, lsr #3	/* Bits -> Bytes.  */
105	bx	lr
106
107L(misaligned8):
108	ldrd	data1a, data1b, [src]
109	and	tmp2, tmp1, #3
110	rsb	result, tmp1, #0
111	lsl	tmp2, tmp2, #3			/* Bytes -> bits.  */
112	tst	tmp1, #4
113	pld	[src, #64]
114	S2HI	tmp2, const_m1, tmp2
115	orn	data1a, data1a, tmp2
116	itt	ne
117	ornne	data1b, data1b, tmp2
118	movne	data1a, const_m1
119	mov	const_0, #0
120	b	L(start_realigned)
121
122END (__strlen_armv6t2)
123
124#endif /* __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2  */
125