xref: /linux/arch/alpha/lib/memmove.S (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * arch/alpha/lib/memmove.S
4 *
5 * Barely optimized memmove routine for Alpha EV5.
6 *
7 * This is hand-massaged output from the original memcpy.c.  We defer to
8 * memcpy whenever possible; the backwards copy loops are not unrolled.
9 */
10#include <asm/export.h>
11	.set noat
12	.set noreorder
13	.text
14
15	.align 4
16	.globl memmove
17	.ent memmove
18memmove:
19	ldgp $29, 0($27)
20	unop
21	nop
22	.prologue 1
23
24	addq $16,$18,$4
25	addq $17,$18,$5
26	cmpule $4,$17,$1		/*  dest + n <= src  */
27	cmpule $5,$16,$2		/*  dest >= src + n  */
28
29	bis $1,$2,$1
30	mov $16,$0
31	xor $16,$17,$2
32	bne $1,memcpy			!samegp
33
34	and $2,7,$2			/* Test for src/dest co-alignment.  */
35	and $16,7,$1
36	cmpule $16,$17,$3
37	bne $3,$memmove_up		/* dest < src */
38
39	and $4,7,$1
40	bne $2,$misaligned_dn
41	unop
42	beq $1,$skip_aligned_byte_loop_head_dn
43
44$aligned_byte_loop_head_dn:
45	lda $4,-1($4)
46	lda $5,-1($5)
47	unop
48	ble $18,$egress
49
50	ldq_u $3,0($5)
51	ldq_u $2,0($4)
52	lda $18,-1($18)
53	extbl $3,$5,$1
54
55	insbl $1,$4,$1
56	mskbl $2,$4,$2
57	bis $1,$2,$1
58	and $4,7,$6
59
60	stq_u $1,0($4)
61	bne $6,$aligned_byte_loop_head_dn
62
63$skip_aligned_byte_loop_head_dn:
64	lda $18,-8($18)
65	blt $18,$skip_aligned_word_loop_dn
66
67$aligned_word_loop_dn:
68	ldq $1,-8($5)
69	nop
70	lda $5,-8($5)
71	lda $18,-8($18)
72
73	stq $1,-8($4)
74	nop
75	lda $4,-8($4)
76	bge $18,$aligned_word_loop_dn
77
78$skip_aligned_word_loop_dn:
79	lda $18,8($18)
80	bgt $18,$byte_loop_tail_dn
81	unop
82	ret $31,($26),1
83
84	.align 4
85$misaligned_dn:
86	nop
87	fnop
88	unop
89	beq $18,$egress
90
91$byte_loop_tail_dn:
92	ldq_u $3,-1($5)
93	ldq_u $2,-1($4)
94	lda $5,-1($5)
95	lda $4,-1($4)
96
97	lda $18,-1($18)
98	extbl $3,$5,$1
99	insbl $1,$4,$1
100	mskbl $2,$4,$2
101
102	bis $1,$2,$1
103	stq_u $1,0($4)
104	bgt $18,$byte_loop_tail_dn
105	br $egress
106
107$memmove_up:
108	mov $16,$4
109	mov $17,$5
110	bne $2,$misaligned_up
111	beq $1,$skip_aligned_byte_loop_head_up
112
113$aligned_byte_loop_head_up:
114	unop
115	ble $18,$egress
116	ldq_u $3,0($5)
117	ldq_u $2,0($4)
118
119	lda $18,-1($18)
120	extbl $3,$5,$1
121	insbl $1,$4,$1
122	mskbl $2,$4,$2
123
124	bis $1,$2,$1
125	lda $5,1($5)
126	stq_u $1,0($4)
127	lda $4,1($4)
128
129	and $4,7,$6
130	bne $6,$aligned_byte_loop_head_up
131
132$skip_aligned_byte_loop_head_up:
133	lda $18,-8($18)
134	blt $18,$skip_aligned_word_loop_up
135
136$aligned_word_loop_up:
137	ldq $1,0($5)
138	nop
139	lda $5,8($5)
140	lda $18,-8($18)
141
142	stq $1,0($4)
143	nop
144	lda $4,8($4)
145	bge $18,$aligned_word_loop_up
146
147$skip_aligned_word_loop_up:
148	lda $18,8($18)
149	bgt $18,$byte_loop_tail_up
150	unop
151	ret $31,($26),1
152
153	.align 4
154$misaligned_up:
155	nop
156	fnop
157	unop
158	beq $18,$egress
159
160$byte_loop_tail_up:
161	ldq_u $3,0($5)
162	ldq_u $2,0($4)
163	lda $18,-1($18)
164	extbl $3,$5,$1
165
166	insbl $1,$4,$1
167	mskbl $2,$4,$2
168	bis $1,$2,$1
169	stq_u $1,0($4)
170
171	lda $5,1($5)
172	lda $4,1($4)
173	nop
174	bgt $18,$byte_loop_tail_up
175
176$egress:
177	ret $31,($26),1
178	nop
179	nop
180	nop
181
182	.end memmove
183	EXPORT_SYMBOL(memmove)
184