xref: /freebsd/contrib/arm-optimized-routines/string/arm/strcpy.c (revision bc5304a006238115291e7568583632889dffbab9)
1 /*
2  * strcpy
3  *
4  * Copyright (c) 2008-2020, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #if defined (__thumb2__) && !defined (__thumb__)
9 
10 /* For GLIBC:
11 #include <string.h>
12 #include <memcopy.h>
13 
14 #undef strcmp
15 */
16 
17 #ifdef __thumb2__
18 #define magic1(REG) "#0x01010101"
19 #define magic2(REG) "#0x80808080"
20 #else
21 #define magic1(REG) #REG
22 #define magic2(REG) #REG ", lsl #7"
23 #endif
24 
25 char* __attribute__((naked))
26 __strcpy_arm (char* dst, const char* src)
27 {
28   __asm__ (
29        "pld	[r1, #0]\n\t"
30        "eor	r2, r0, r1\n\t"
31        "mov	ip, r0\n\t"
32        "tst	r2, #3\n\t"
33        "bne	4f\n\t"
34        "tst	r1, #3\n\t"
35        "bne	3f\n"
36   "5:\n\t"
37 # ifndef __thumb2__
38        "str	r5, [sp, #-4]!\n\t"
39        "mov	r5, #0x01\n\t"
40        "orr	r5, r5, r5, lsl #8\n\t"
41        "orr	r5, r5, r5, lsl #16\n\t"
42 # endif
43 
44        "str	r4, [sp, #-4]!\n\t"
45        "tst	r1, #4\n\t"
46        "ldr	r3, [r1], #4\n\t"
47        "beq	2f\n\t"
48        "sub	r2, r3, "magic1(r5)"\n\t"
49        "bics	r2, r2, r3\n\t"
50        "tst	r2, "magic2(r5)"\n\t"
51        "itt	eq\n\t"
52        "streq	r3, [ip], #4\n\t"
53        "ldreq	r3, [r1], #4\n"
54        "bne	1f\n\t"
55        /* Inner loop.  We now know that r1 is 64-bit aligned, so we
56 	  can safely fetch up to two words.  This allows us to avoid
57 	  load stalls.  */
58        ".p2align 2\n"
59   "2:\n\t"
60        "pld	[r1, #8]\n\t"
61        "ldr	r4, [r1], #4\n\t"
62        "sub	r2, r3, "magic1(r5)"\n\t"
63        "bics	r2, r2, r3\n\t"
64        "tst	r2, "magic2(r5)"\n\t"
65        "sub	r2, r4, "magic1(r5)"\n\t"
66        "bne	1f\n\t"
67        "str	r3, [ip], #4\n\t"
68        "bics	r2, r2, r4\n\t"
69        "tst	r2, "magic2(r5)"\n\t"
70        "itt	eq\n\t"
71        "ldreq	r3, [r1], #4\n\t"
72        "streq	r4, [ip], #4\n\t"
73        "beq	2b\n\t"
74        "mov	r3, r4\n"
75   "1:\n\t"
76 # ifdef __ARMEB__
77        "rors	r3, r3, #24\n\t"
78 # endif
79        "strb	r3, [ip], #1\n\t"
80        "tst	r3, #0xff\n\t"
81 # ifdef __ARMEL__
82        "ror	r3, r3, #8\n\t"
83 # endif
84        "bne	1b\n\t"
85        "ldr	r4, [sp], #4\n\t"
86 # ifndef __thumb2__
87        "ldr	r5, [sp], #4\n\t"
88 # endif
89        "BX LR\n"
90 
91        /* Strings have the same offset from word alignment, but it's
92 	  not zero.  */
93   "3:\n\t"
94        "tst	r1, #1\n\t"
95        "beq	1f\n\t"
96        "ldrb	r2, [r1], #1\n\t"
97        "strb	r2, [ip], #1\n\t"
98        "cmp	r2, #0\n\t"
99        "it	eq\n"
100        "BXEQ LR\n"
101   "1:\n\t"
102        "tst	r1, #2\n\t"
103        "beq	5b\n\t"
104        "ldrh	r2, [r1], #2\n\t"
105 # ifdef __ARMEB__
106        "tst	r2, #0xff00\n\t"
107        "iteet	ne\n\t"
108        "strneh	r2, [ip], #2\n\t"
109        "lsreq	r2, r2, #8\n\t"
110        "streqb	r2, [ip]\n\t"
111        "tstne	r2, #0xff\n\t"
112 # else
113        "tst	r2, #0xff\n\t"
114        "itet	ne\n\t"
115        "strneh	r2, [ip], #2\n\t"
116        "streqb	r2, [ip]\n\t"
117        "tstne	r2, #0xff00\n\t"
118 # endif
119        "bne	5b\n\t"
120        "BX LR\n"
121 
122        /* src and dst do not have a common word-alignement.  Fall back to
123 	  byte copying.  */
124   "4:\n\t"
125        "ldrb	r2, [r1], #1\n\t"
126        "strb	r2, [ip], #1\n\t"
127        "cmp	r2, #0\n\t"
128        "bne	4b\n\t"
129        "BX LR");
130 }
131 /* For GLIBC: libc_hidden_builtin_def (strcpy) */
132 
133 #endif /* defined (__thumb2__) && !defined (__thumb__)  */
134