xref: /freebsd/contrib/cortex-strings/src/thumb-2/strlen.S (revision f126d349810fdb512c0b01e101342d430b947488)
1/* Copyright (c) 2010-2011,2013 Linaro Limited
2   All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7
8      * Redistributions of source code must retain the above copyright
9      notice, this list of conditions and the following disclaimer.
10
11      * Redistributions in binary form must reproduce the above copyright
12      notice, this list of conditions and the following disclaimer in the
13      documentation and/or other materials provided with the distribution.
14
15      * Neither the name of Linaro Limited nor the names of its
16      contributors may be used to endorse or promote products derived
17      from this software without specific prior written permission.
18
19   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33   Assumes:
34   ARMv6T2, AArch32
35
36 */
37
38	.macro def_fn f p2align=0
39	.text
40	.p2align \p2align
41	.global \f
42	.type \f, %function
43\f:
44	.endm
45
46#ifdef __ARMEB__
47#define S2LO		lsl
48#define S2HI		lsr
49#else
50#define S2LO		lsr
51#define S2HI		lsl
52#endif
53
54	/* This code requires Thumb.  */
55	.thumb
56	.syntax unified
57
58/* Parameters and result.  */
59#define srcin		r0
60#define result		r0
61
62/* Internal variables.  */
63#define src		r1
64#define data1a		r2
65#define data1b		r3
66#define const_m1	r12
67#define const_0		r4
68#define tmp1		r4		/* Overlaps const_0  */
69#define tmp2		r5
70
71def_fn	strlen p2align=6
72	pld	[srcin, #0]
73	strd	r4, r5, [sp, #-8]!
74	bic	src, srcin, #7
75	mvn	const_m1, #0
76	ands	tmp1, srcin, #7		/* (8 - bytes) to alignment.  */
77	pld	[src, #32]
78	bne.w	.Lmisaligned8
79	mov	const_0, #0
80	mov	result, #-8
81.Lloop_aligned:
82	/* Bytes 0-7.  */
83	ldrd	data1a, data1b, [src]
84	pld	[src, #64]
85	add	result, result, #8
86.Lstart_realigned:
87	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
88	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
89	uadd8	data1b, data1b, const_m1
90	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
91	cbnz	data1b, .Lnull_found
92
93	/* Bytes 8-15.  */
94	ldrd	data1a, data1b, [src, #8]
95	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
96	add	result, result, #8
97	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
98	uadd8	data1b, data1b, const_m1
99	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
100	cbnz	data1b, .Lnull_found
101
102	/* Bytes 16-23.  */
103	ldrd	data1a, data1b, [src, #16]
104	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
105	add	result, result, #8
106	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
107	uadd8	data1b, data1b, const_m1
108	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
109	cbnz	data1b, .Lnull_found
110
111	/* Bytes 24-31.  */
112	ldrd	data1a, data1b, [src, #24]
113	add	src, src, #32
114	uadd8	data1a, data1a, const_m1	/* Saturating GE<0:3> set.  */
115	add	result, result, #8
116	sel	data1a, const_0, const_m1	/* Select based on GE<0:3>.  */
117	uadd8	data1b, data1b, const_m1
118	sel	data1b, data1a, const_m1	/* Only used if d1a == 0.  */
119	cmp	data1b, #0
120	beq	.Lloop_aligned
121
122.Lnull_found:
123	cmp	data1a, #0
124	itt	eq
125	addeq	result, result, #4
126	moveq	data1a, data1b
127#ifndef __ARMEB__
128	rev	data1a, data1a
129#endif
130	clz	data1a, data1a
131	ldrd	r4, r5, [sp], #8
132	add	result, result, data1a, lsr #3	/* Bits -> Bytes.  */
133	bx	lr
134
135.Lmisaligned8:
136	ldrd	data1a, data1b, [src]
137	and	tmp2, tmp1, #3
138	rsb	result, tmp1, #0
139	lsl	tmp2, tmp2, #3			/* Bytes -> bits.  */
140	tst	tmp1, #4
141	pld	[src, #64]
142	S2HI	tmp2, const_m1, tmp2
143	orn	data1a, data1a, tmp2
144	itt	ne
145	ornne	data1b, data1b, tmp2
146	movne	data1a, const_m1
147	mov	const_0, #0
148	b	.Lstart_realigned
149	.size	strlen, . - strlen
150
151