xref: /linux/arch/arm/lib/memset.S (revision 74ce1896c6c65b2f8cccbf59162d542988835835)
1/*
2 *  linux/arch/arm/lib/memset.S
3 *
4 *  Copyright (C) 1995-2000 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 *  ASM optimised string functions
11 */
12#include <linux/linkage.h>
13#include <asm/assembler.h>
14#include <asm/unwind.h>
15
16	.text
17	.align	5
18
19ENTRY(mmioset)
20ENTRY(memset)
21UNWIND( .fnstart         )
22	ands	r3, r0, #3		@ 1 unaligned?
23	mov	ip, r0			@ preserve r0 as return value
24	bne	6f			@ 1
25/*
26 * we know that the pointer in ip is aligned to a word boundary.
27 */
281:	orr	r1, r1, r1, lsl #8
29	orr	r1, r1, r1, lsl #16
30	mov	r3, r1
317:	cmp	r2, #16
32	blt	4f
33
34#if ! CALGN(1)+0
35
36/*
37 * We need 2 extra registers for this loop - use r8 and the LR
38 */
39	stmfd	sp!, {r8, lr}
40UNWIND( .fnend              )
41UNWIND( .fnstart            )
42UNWIND( .save {r8, lr}      )
43	mov	r8, r1
44	mov	lr, r3
45
462:	subs	r2, r2, #64
47	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
48	stmgeia	ip!, {r1, r3, r8, lr}
49	stmgeia	ip!, {r1, r3, r8, lr}
50	stmgeia	ip!, {r1, r3, r8, lr}
51	bgt	2b
52	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.
53/*
54 * No need to correct the count; we're only testing bits from now on
55 */
56	tst	r2, #32
57	stmneia	ip!, {r1, r3, r8, lr}
58	stmneia	ip!, {r1, r3, r8, lr}
59	tst	r2, #16
60	stmneia	ip!, {r1, r3, r8, lr}
61	ldmfd	sp!, {r8, lr}
62UNWIND( .fnend              )
63
64#else
65
66/*
67 * This version aligns the destination pointer in order to write
68 * whole cache lines at once.
69 */
70
71	stmfd	sp!, {r4-r8, lr}
72UNWIND( .fnend                 )
73UNWIND( .fnstart               )
74UNWIND( .save {r4-r8, lr}      )
75	mov	r4, r1
76	mov	r5, r3
77	mov	r6, r1
78	mov	r7, r3
79	mov	r8, r1
80	mov	lr, r3
81
82	cmp	r2, #96
83	tstgt	ip, #31
84	ble	3f
85
86	and	r8, ip, #31
87	rsb	r8, r8, #32
88	sub	r2, r2, r8
89	movs	r8, r8, lsl #(32 - 4)
90	stmcsia	ip!, {r4, r5, r6, r7}
91	stmmiia	ip!, {r4, r5}
92	tst	r8, #(1 << 30)
93	mov	r8, r1
94	strne	r1, [ip], #4
95
963:	subs	r2, r2, #64
97	stmgeia	ip!, {r1, r3-r8, lr}
98	stmgeia	ip!, {r1, r3-r8, lr}
99	bgt	3b
100	ldmeqfd	sp!, {r4-r8, pc}
101
102	tst	r2, #32
103	stmneia	ip!, {r1, r3-r8, lr}
104	tst	r2, #16
105	stmneia	ip!, {r4-r7}
106	ldmfd	sp!, {r4-r8, lr}
107UNWIND( .fnend                 )
108
109#endif
110
111UNWIND( .fnstart            )
1124:	tst	r2, #8
113	stmneia	ip!, {r1, r3}
114	tst	r2, #4
115	strne	r1, [ip], #4
116/*
117 * When we get here, we've got less than 4 bytes to set.  We
118 * may have an unaligned pointer as well.
119 */
1205:	tst	r2, #2
121	strneb	r1, [ip], #1
122	strneb	r1, [ip], #1
123	tst	r2, #1
124	strneb	r1, [ip], #1
125	ret	lr
126
1276:	subs	r2, r2, #4		@ 1 do we have enough
128	blt	5b			@ 1 bytes to align with?
129	cmp	r3, #2			@ 1
130	strltb	r1, [ip], #1		@ 1
131	strleb	r1, [ip], #1		@ 1
132	strb	r1, [ip], #1		@ 1
133	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
134	b	1b
135UNWIND( .fnend   )
136ENDPROC(memset)
137ENDPROC(mmioset)
138
139ENTRY(__memset32)
140UNWIND( .fnstart         )
141	mov	r3, r1			@ copy r1 to r3 and fall into memset64
142UNWIND( .fnend   )
143ENDPROC(__memset32)
144ENTRY(__memset64)
145UNWIND( .fnstart         )
146	mov	ip, r0			@ preserve r0 as return value
147	b	7b			@ jump into the middle of memset
148UNWIND( .fnend   )
149ENDPROC(__memset64)
150