xref: /freebsd/contrib/arm-optimized-routines/string/arm/memset.S (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1/*
2 * memset - fill memory with a constant
3 *
4 * Copyright (c) 2010-2021, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8/*
9   Written by Dave Gilbert <david.gilbert@linaro.org>
10
11   This memset routine is optimised on a Cortex-A9 and should work on
12   all ARMv7 processors.
13
14 */
15
16	.syntax unified
17	.arch armv7-a
18
19@ 2011-08-30 david.gilbert@linaro.org
20@    Extracted from local git 2f11b436
21
22@ this lets us check a flag in a 00/ff byte easily in either endianness
23#ifdef __ARMEB__
24#define CHARTSTMASK(c) 1<<(31-(c*8))
25#else
26#define CHARTSTMASK(c) 1<<(c*8)
27#endif
28	.thumb
29
30@ ---------------------------------------------------------------------------
31	.thumb_func
32	.align 2
33	.p2align 4,,15
34	.global __memset_arm
35	.type __memset_arm,%function
36__memset_arm:
37	@ r0 = address
38	@ r1 = character
39	@ r2 = count
40	@ returns original address in r0
41
42	mov	r3, r0		@ Leave r0 alone
43	cbz	r2, 10f		@ Exit if 0 length
44
45	tst	r0, #7
46	beq	2f		@ Already aligned
47
48	@ Ok, so we're misaligned here
491:
50	strb	r1, [r3], #1
51	subs	r2,r2,#1
52	tst	r3, #7
53	cbz	r2, 10f		@ Exit if we hit the end
54	bne	1b		@ go round again if still misaligned
55
562:
57	@ OK, so we're aligned
58	push	{r4,r5,r6,r7}
59	bics	r4, r2, #15	@ if less than 16 bytes then need to finish it off
60	beq	5f
61
623:
63	@ POSIX says that ch is cast to an unsigned char.  A uxtb is one
64	@ byte and takes two cycles, where an AND is four bytes but one
65	@ cycle.
66	and	r1, #0xFF
67	orr	r1, r1, r1, lsl#8	@ Same character into all bytes
68	orr	r1, r1, r1, lsl#16
69	mov	r5,r1
70	mov	r6,r1
71	mov	r7,r1
72
734:
74	subs	r4,r4,#16
75	stmia	r3!,{r1,r5,r6,r7}
76	bne	4b
77	and	r2,r2,#15
78
79	@ At this point we're still aligned and we have upto align-1 bytes left to right
80	@ we can avoid some of the byte-at-a time now by testing for some big chunks
81	tst	r2,#8
82	itt	ne
83	subne	r2,r2,#8
84	stmiane	r3!,{r1,r5}
85
865:
87	pop	{r4,r5,r6,r7}
88	cbz	r2, 10f
89
90	@ Got to do any last < alignment bytes
916:
92	subs	r2,r2,#1
93	strb	r1,[r3],#1
94	bne	6b
95
9610:
97	bx	lr		@ goodbye
98	.size	__memset_arm, . - __memset_arm
99