xref: /linux/arch/sh/lib/movmem.S (revision efdbd7345f8836f7495f3ac6ee237d86cb3bb6b0)
1/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2   2004, 2005, 2006
3   Free Software Foundation, Inc.
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 2, or (at your option) any
8later version.
9
10In addition to the permissions in the GNU General Public License, the
11Free Software Foundation gives you unlimited permission to link the
12compiled version of this file into combinations with other programs,
13and to distribute those combinations without any restriction coming
14from the use of this file.  (The General Public License restrictions
15do apply in other respects; for example, they cover modification of
16the file, and distribution when not linked into a combine
17executable.)
18
19This file is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22General Public License for more details.
23
24You should have received a copy of the GNU General Public License
25along with this program; see the file COPYING.  If not, write to
26the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27Boston, MA 02110-1301, USA.  */
28
29!! libgcc routines for the Renesas / SuperH SH CPUs.
30!! Contributed by Steve Chamberlain.
31!! sac@cygnus.com
32
33!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34!! recoded in assembly by Toshiyasu Morita
35!! tm@netcom.com
36
37/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38   ELF local label prefixes by J"orn Rennecke
39   amylaar@cygnus.com  */
40
41	.text
42	.balign	4
43	.global	__movmem
44	.global __movstr
45	.set __movstr, __movmem
46	/* This would be a lot simpler if r6 contained the byte count
47	   minus 64, and we wouldn't be called here for a byte count of 64.  */
48__movmem:
49	sts.l	pr,@-r15
50	shll2	r6
51	bsr	__movmemSI52+2
52	mov.l	@(48,r5),r0
53	.balign	4
54movmem_loop: /* Reached with rts */
55	mov.l	@(60,r5),r0
56	add	#-64,r6
57	mov.l	r0,@(60,r4)
58	tst	r6,r6
59	mov.l	@(56,r5),r0
60	bt	movmem_done
61	mov.l	r0,@(56,r4)
62	cmp/pl	r6
63	mov.l	@(52,r5),r0
64	add	#64,r5
65	mov.l	r0,@(52,r4)
66	add	#64,r4
67	bt	__movmemSI52
68! done all the large groups, do the remainder
69! jump to movmem+
70	mova	__movmemSI4+4,r0
71	add	r6,r0
72	jmp	@r0
73movmem_done: ! share slot insn, works out aligned.
74	lds.l	@r15+,pr
75	mov.l	r0,@(56,r4)
76	mov.l	@(52,r5),r0
77	rts
78	mov.l	r0,@(52,r4)
79	.balign	4
80
81	.global	__movmemSI64
82	.global __movstrSI64
83	.set	__movstrSI64, __movmemSI64
84__movmemSI64:
85	mov.l	@(60,r5),r0
86	mov.l	r0,@(60,r4)
87	.global	__movmemSI60
88	.global __movstrSI60
89	.set	__movstrSI60, __movmemSI60
90__movmemSI60:
91	mov.l	@(56,r5),r0
92	mov.l	r0,@(56,r4)
93	.global	__movmemSI56
94	.global __movstrSI56
95	.set	__movstrSI56, __movmemSI56
96__movmemSI56:
97	mov.l	@(52,r5),r0
98	mov.l	r0,@(52,r4)
99	.global	__movmemSI52
100	.global __movstrSI52
101	.set	__movstrSI52, __movmemSI52
102__movmemSI52:
103	mov.l	@(48,r5),r0
104	mov.l	r0,@(48,r4)
105	.global	__movmemSI48
106	.global	__movstrSI48
107	.set	__movstrSI48, __movmemSI48
108__movmemSI48:
109	mov.l	@(44,r5),r0
110	mov.l	r0,@(44,r4)
111	.global	__movmemSI44
112	.global	__movstrSI44
113	.set	__movstrSI44, __movmemSI44
114__movmemSI44:
115	mov.l	@(40,r5),r0
116	mov.l	r0,@(40,r4)
117	.global	__movmemSI40
118	.global __movstrSI40
119	.set	__movstrSI40, __movmemSI40
120__movmemSI40:
121	mov.l	@(36,r5),r0
122	mov.l	r0,@(36,r4)
123	.global	__movmemSI36
124	.global	__movstrSI36
125	.set	__movstrSI36, __movmemSI36
126__movmemSI36:
127	mov.l	@(32,r5),r0
128	mov.l	r0,@(32,r4)
129	.global	__movmemSI32
130	.global	__movstrSI32
131	.set	__movstrSI32, __movmemSI32
132__movmemSI32:
133	mov.l	@(28,r5),r0
134	mov.l	r0,@(28,r4)
135	.global	__movmemSI28
136	.global	__movstrSI28
137	.set	__movstrSI28, __movmemSI28
138__movmemSI28:
139	mov.l	@(24,r5),r0
140	mov.l	r0,@(24,r4)
141	.global	__movmemSI24
142	.global	__movstrSI24
143	.set	__movstrSI24, __movmemSI24
144__movmemSI24:
145	mov.l	@(20,r5),r0
146	mov.l	r0,@(20,r4)
147	.global	__movmemSI20
148	.global	__movstrSI20
149	.set	__movstrSI20, __movmemSI20
150__movmemSI20:
151	mov.l	@(16,r5),r0
152	mov.l	r0,@(16,r4)
153	.global	__movmemSI16
154	.global	__movstrSI16
155	.set	__movstrSI16, __movmemSI16
156__movmemSI16:
157	mov.l	@(12,r5),r0
158	mov.l	r0,@(12,r4)
159	.global	__movmemSI12
160	.global	__movstrSI12
161	.set	__movstrSI12, __movmemSI12
162__movmemSI12:
163	mov.l	@(8,r5),r0
164	mov.l	r0,@(8,r4)
165	.global	__movmemSI8
166	.global	__movstrSI8
167	.set	__movstrSI8, __movmemSI8
168__movmemSI8:
169	mov.l	@(4,r5),r0
170	mov.l	r0,@(4,r4)
171	.global	__movmemSI4
172	.global	__movstrSI4
173	.set	__movstrSI4, __movmemSI4
174__movmemSI4:
175	mov.l	@(0,r5),r0
176	rts
177	mov.l	r0,@(0,r4)
178
179	.global	__movmem_i4_even
180	.global	__movstr_i4_even
181	.set	__movstr_i4_even, __movmem_i4_even
182
183	.global	__movmem_i4_odd
184	.global	__movstr_i4_odd
185	.set	__movstr_i4_odd, __movmem_i4_odd
186
187	.global	__movmemSI12_i4
188	.global	__movstrSI12_i4
189	.set	__movstrSI12_i4, __movmemSI12_i4
190
191	.p2align	5
192L_movmem_2mod4_end:
193	mov.l	r0,@(16,r4)
194	rts
195	mov.l	r1,@(20,r4)
196
197	.p2align	2
198
199__movmem_i4_even:
200	mov.l	@r5+,r0
201	bra	L_movmem_start_even
202	mov.l	@r5+,r1
203
204__movmem_i4_odd:
205	mov.l	@r5+,r1
206	add	#-4,r4
207	mov.l	@r5+,r2
208	mov.l	@r5+,r3
209	mov.l	r1,@(4,r4)
210	mov.l	r2,@(8,r4)
211
212L_movmem_loop:
213	mov.l	r3,@(12,r4)
214	dt	r6
215	mov.l	@r5+,r0
216	bt/s	L_movmem_2mod4_end
217	mov.l	@r5+,r1
218	add	#16,r4
219L_movmem_start_even:
220	mov.l	@r5+,r2
221	mov.l	@r5+,r3
222	mov.l	r0,@r4
223	dt	r6
224	mov.l	r1,@(4,r4)
225	bf/s	L_movmem_loop
226	mov.l	r2,@(8,r4)
227	rts
228	mov.l	r3,@(12,r4)
229
230	.p2align	4
231__movmemSI12_i4:
232	mov.l	@r5,r0
233	mov.l	@(4,r5),r1
234	mov.l	@(8,r5),r2
235	mov.l	r0,@r4
236	mov.l	r1,@(4,r4)
237	rts
238	mov.l	r2,@(8,r4)
239