xref: /freebsd/contrib/cortex-strings/src/thumb-2/strcpy.c (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 /*
2  * Copyright (c) 2008 ARM Ltd
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the company may not be used to endorse or promote
14  *    products derived from this software without specific prior written
15  *    permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* For GLIBC:
30 #include <string.h>
31 #include <memcopy.h>
32 
33 #undef strcmp
34 */
35 
36 #ifdef __thumb2__
37 #define magic1(REG) "#0x01010101"
38 #define magic2(REG) "#0x80808080"
39 #else
40 #define magic1(REG) #REG
41 #define magic2(REG) #REG ", lsl #7"
42 #endif
43 
44 char* __attribute__((naked))
45 strcpy (char* dst, const char* src)
46 {
47   asm (
48 #if !(defined(__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
49       (defined (__thumb__) && !defined (__thumb2__)))
50        "pld	[r1, #0]\n\t"
51        "eor	r2, r0, r1\n\t"
52        "mov	ip, r0\n\t"
53        "tst	r2, #3\n\t"
54        "bne	4f\n\t"
55        "tst	r1, #3\n\t"
56        "bne	3f\n"
57   "5:\n\t"
58 #ifndef __thumb2__
59        "str	r5, [sp, #-4]!\n\t"
60        "mov	r5, #0x01\n\t"
61        "orr	r5, r5, r5, lsl #8\n\t"
62        "orr	r5, r5, r5, lsl #16\n\t"
63 #endif
64 
65        "str	r4, [sp, #-4]!\n\t"
66        "tst	r1, #4\n\t"
67        "ldr	r3, [r1], #4\n\t"
68        "beq	2f\n\t"
69        "sub	r2, r3, "magic1(r5)"\n\t"
70        "bics	r2, r2, r3\n\t"
71        "tst	r2, "magic2(r5)"\n\t"
72        "itt	eq\n\t"
73        "streq	r3, [ip], #4\n\t"
74        "ldreq	r3, [r1], #4\n"
75        "bne	1f\n\t"
76        /* Inner loop.  We now know that r1 is 64-bit aligned, so we
77 	  can safely fetch up to two words.  This allows us to avoid
78 	  load stalls.  */
79        ".p2align 2\n"
80   "2:\n\t"
81        "pld	[r1, #8]\n\t"
82        "ldr	r4, [r1], #4\n\t"
83        "sub	r2, r3, "magic1(r5)"\n\t"
84        "bics	r2, r2, r3\n\t"
85        "tst	r2, "magic2(r5)"\n\t"
86        "sub	r2, r4, "magic1(r5)"\n\t"
87        "bne	1f\n\t"
88        "str	r3, [ip], #4\n\t"
89        "bics	r2, r2, r4\n\t"
90        "tst	r2, "magic2(r5)"\n\t"
91        "itt	eq\n\t"
92        "ldreq	r3, [r1], #4\n\t"
93        "streq	r4, [ip], #4\n\t"
94        "beq	2b\n\t"
95        "mov	r3, r4\n"
96   "1:\n\t"
97 #ifdef __ARMEB__
98        "rors	r3, r3, #24\n\t"
99 #endif
100        "strb	r3, [ip], #1\n\t"
101        "tst	r3, #0xff\n\t"
102 #ifdef __ARMEL__
103        "ror	r3, r3, #8\n\t"
104 #endif
105        "bne	1b\n\t"
106        "ldr	r4, [sp], #4\n\t"
107 #ifndef __thumb2__
108        "ldr	r5, [sp], #4\n\t"
109 #endif
110        "BX LR\n"
111 
112        /* Strings have the same offset from word alignment, but it's
113 	  not zero.  */
114   "3:\n\t"
115        "tst	r1, #1\n\t"
116        "beq	1f\n\t"
117        "ldrb	r2, [r1], #1\n\t"
118        "strb	r2, [ip], #1\n\t"
119        "cmp	r2, #0\n\t"
120        "it	eq\n"
121        "BXEQ LR\n"
122   "1:\n\t"
123        "tst	r1, #2\n\t"
124        "beq	5b\n\t"
125        "ldrh	r2, [r1], #2\n\t"
126 #ifdef __ARMEB__
127        "tst	r2, #0xff00\n\t"
128        "iteet	ne\n\t"
129        "strneh	r2, [ip], #2\n\t"
130        "lsreq	r2, r2, #8\n\t"
131        "streqb	r2, [ip]\n\t"
132        "tstne	r2, #0xff\n\t"
133 #else
134        "tst	r2, #0xff\n\t"
135        "itet	ne\n\t"
136        "strneh	r2, [ip], #2\n\t"
137        "streqb	r2, [ip]\n\t"
138        "tstne	r2, #0xff00\n\t"
139 #endif
140        "bne	5b\n\t"
141        "BX LR\n"
142 
143        /* src and dst do not have a common word-alignement.  Fall back to
144 	  byte copying.  */
145   "4:\n\t"
146        "ldrb	r2, [r1], #1\n\t"
147        "strb	r2, [ip], #1\n\t"
148        "cmp	r2, #0\n\t"
149        "bne	4b\n\t"
150        "BX LR"
151 
152 #elif !defined (__thumb__) || defined (__thumb2__)
153        "mov	r3, r0\n\t"
154   "1:\n\t"
155        "ldrb	r2, [r1], #1\n\t"
156        "strb	r2, [r3], #1\n\t"
157        "cmp	r2, #0\n\t"
158        "bne	1b\n\t"
159        "BX LR"
160 #else
161        "mov	r3, r0\n\t"
162   "1:\n\t"
163        "ldrb	r2, [r1]\n\t"
164        "add	r1, r1, #1\n\t"
165        "strb	r2, [r3]\n\t"
166        "add	r3, r3, #1\n\t"
167        "cmp	r2, #0\n\t"
168        "bne	1b\n\t"
169        "BX LR"
170 #endif
171        );
172 }
173 /* For GLIBC: libc_hidden_builtin_def (strcpy) */
174