xref: /freebsd/contrib/cortex-strings/src/arm/memset.S (revision 5bf5ca772c6de2d53344a78cf461447cc322ccea)
1/* Copyright (c) 2010-2011, Linaro Limited
2   All rights reserved.
3
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7
8      * Redistributions of source code must retain the above copyright
9      notice, this list of conditions and the following disclaimer.
10
11      * Redistributions in binary form must reproduce the above copyright
12      notice, this list of conditions and the following disclaimer in the
13      documentation and/or other materials provided with the distribution.
14
15      * Neither the name of Linaro Limited nor the names of its
16      contributors may be used to endorse or promote products derived
17      from this software without specific prior written permission.
18
19   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*
33   Written by Dave Gilbert <david.gilbert@linaro.org>
34
35   This memset routine is optimised on a Cortex-A9 and should work on
36   all ARMv7 processors.
37
38 */
39
40	.syntax unified
41	.arch armv7-a
42
43@ 2011-08-30 david.gilbert@linaro.org
44@    Extracted from local git 2f11b436
45
46@ this lets us check a flag in a 00/ff byte easily in either endianness
47#ifdef __ARMEB__
48#define CHARTSTMASK(c) 1<<(31-(c*8))
49#else
50#define CHARTSTMASK(c) 1<<(c*8)
51#endif
52	.text
53	.thumb
54
55@ ---------------------------------------------------------------------------
56	.thumb_func
57	.align 2
58	.p2align 4,,15
59	.global memset
60	.type memset,%function
61memset:
62	@ r0 = address
63	@ r1 = character
64	@ r2 = count
65	@ returns original address in r0
66
67	mov	r3, r0		@ Leave r0 alone
68	cbz	r2, 10f		@ Exit if 0 length
69
70	tst	r0, #7
71	beq	2f		@ Already aligned
72
73	@ Ok, so we're misaligned here
741:
75	strb	r1, [r3], #1
76	subs	r2,r2,#1
77	tst	r3, #7
78	cbz	r2, 10f		@ Exit if we hit the end
79	bne	1b		@ go round again if still misaligned
80
812:
82	@ OK, so we're aligned
83	push	{r4,r5,r6,r7}
84	bics	r4, r2, #15	@ if less than 16 bytes then need to finish it off
85	beq	5f
86
873:
88	@ POSIX says that ch is cast to an unsigned char.  A uxtb is one
89	@ byte and takes two cycles, where an AND is four bytes but one
90	@ cycle.
91	and	r1, #0xFF
92	orr	r1, r1, r1, lsl#8	@ Same character into all bytes
93	orr	r1, r1, r1, lsl#16
94	mov	r5,r1
95	mov	r6,r1
96	mov	r7,r1
97
984:
99	subs	r4,r4,#16
100	stmia	r3!,{r1,r5,r6,r7}
101	bne	4b
102	and	r2,r2,#15
103
104	@ At this point we're still aligned and we have upto align-1 bytes left to right
105	@ we can avoid some of the byte-at-a time now by testing for some big chunks
106	tst	r2,#8
107	itt	ne
108	subne	r2,r2,#8
109	stmiane	r3!,{r1,r5}
110
1115:
112	pop	{r4,r5,r6,r7}
113	cbz	r2, 10f
114
115	@ Got to do any last < alignment bytes
1166:
117	subs	r2,r2,#1
118	strb	r1,[r3],#1
119	bne	6b
120
12110:
122	bx	lr		@ goodbye
123