xref: /linux/arch/sparc/lib/copy_user.S (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
2 *
3 *  Copyright(C) 1995 Linus Torvalds
4 *  Copyright(C) 1996 David S. Miller
5 *  Copyright(C) 1996 Eddie C. Dost
6 *  Copyright(C) 1996,1998 Jakub Jelinek
7 *
8 * derived from:
9 *	e-mail between David and Eddie.
10 *
11 * Returns 0 if successful, otherwise count of bytes not copied yet
12 */
13
14#include <asm/ptrace.h>
15#include <asm/asmmacro.h>
16#include <asm/page.h>
17
18/* Work around cpp -rob */
19#define ALLOC #alloc
20#define EXECINSTR #execinstr
21#define EX(x,y,a,b) 				\
2298: 	x,y;					\
23	.section .fixup,ALLOC,EXECINSTR;	\
24	.align	4;				\
2599:	ba fixupretl;				\
26	 a, b, %g3;				\
27	.section __ex_table,ALLOC;		\
28	.align	4;				\
29	.word	98b, 99b;			\
30	.text;					\
31	.align	4
32
33#define EX2(x,y,c,d,e,a,b) 			\
3498: 	x,y;					\
35	.section .fixup,ALLOC,EXECINSTR;	\
36	.align	4;				\
3799:	c, d, e;				\
38	ba fixupretl;				\
39	 a, b, %g3;				\
40	.section __ex_table,ALLOC;		\
41	.align	4;				\
42	.word	98b, 99b;			\
43	.text;					\
44	.align	4
45
46#define EXO2(x,y) 				\
4798: 	x, y;					\
48	.section __ex_table,ALLOC;		\
49	.align	4;				\
50	.word	98b, 97f;			\
51	.text;					\
52	.align	4
53
54#define EXT(start,end,handler)			\
55	.section __ex_table,ALLOC;		\
56	.align	4;				\
57	.word	start, 0, end, handler;		\
58	.text;					\
59	.align	4
60
61/* Please do not change following macros unless you change logic used
62 * in .fixup at the end of this file as well
63 */
64
65/* Both these macros have to start with exactly the same insn */
66#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
67	ldd	[%src + (offset) + 0x00], %t0; \
68	ldd	[%src + (offset) + 0x08], %t2; \
69	ldd	[%src + (offset) + 0x10], %t4; \
70	ldd	[%src + (offset) + 0x18], %t6; \
71	st	%t0, [%dst + (offset) + 0x00]; \
72	st	%t1, [%dst + (offset) + 0x04]; \
73	st	%t2, [%dst + (offset) + 0x08]; \
74	st	%t3, [%dst + (offset) + 0x0c]; \
75	st	%t4, [%dst + (offset) + 0x10]; \
76	st	%t5, [%dst + (offset) + 0x14]; \
77	st	%t6, [%dst + (offset) + 0x18]; \
78	st	%t7, [%dst + (offset) + 0x1c];
79
80#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
81	ldd	[%src + (offset) + 0x00], %t0; \
82	ldd	[%src + (offset) + 0x08], %t2; \
83	ldd	[%src + (offset) + 0x10], %t4; \
84	ldd	[%src + (offset) + 0x18], %t6; \
85	std	%t0, [%dst + (offset) + 0x00]; \
86	std	%t2, [%dst + (offset) + 0x08]; \
87	std	%t4, [%dst + (offset) + 0x10]; \
88	std	%t6, [%dst + (offset) + 0x18];
89
90#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
91	ldd	[%src - (offset) - 0x10], %t0; \
92	ldd	[%src - (offset) - 0x08], %t2; \
93	st	%t0, [%dst - (offset) - 0x10]; \
94	st	%t1, [%dst - (offset) - 0x0c]; \
95	st	%t2, [%dst - (offset) - 0x08]; \
96	st	%t3, [%dst - (offset) - 0x04];
97
98#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
99	lduh	[%src + (offset) + 0x00], %t0; \
100	lduh	[%src + (offset) + 0x02], %t1; \
101	lduh	[%src + (offset) + 0x04], %t2; \
102	lduh	[%src + (offset) + 0x06], %t3; \
103	sth	%t0, [%dst + (offset) + 0x00]; \
104	sth	%t1, [%dst + (offset) + 0x02]; \
105	sth	%t2, [%dst + (offset) + 0x04]; \
106	sth	%t3, [%dst + (offset) + 0x06];
107
108#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
109	ldub	[%src - (offset) - 0x02], %t0; \
110	ldub	[%src - (offset) - 0x01], %t1; \
111	stb	%t0, [%dst - (offset) - 0x02]; \
112	stb	%t1, [%dst - (offset) - 0x01];
113
114	.text
115	.align	4
116
117	.globl  __copy_user_begin
118__copy_user_begin:
119
120	.globl	__copy_user
121dword_align:
122	andcc	%o1, 1, %g0
123	be	4f
124	 andcc	%o1, 2, %g0
125
126	EXO2(ldub [%o1], %g2)
127	add	%o1, 1, %o1
128	EXO2(stb %g2, [%o0])
129	sub	%o2, 1, %o2
130	bne	3f
131	 add	%o0, 1, %o0
132
133	EXO2(lduh [%o1], %g2)
134	add	%o1, 2, %o1
135	EXO2(sth %g2, [%o0])
136	sub	%o2, 2, %o2
137	b	3f
138	 add	%o0, 2, %o0
1394:
140	EXO2(lduh [%o1], %g2)
141	add	%o1, 2, %o1
142	EXO2(sth %g2, [%o0])
143	sub	%o2, 2, %o2
144	b	3f
145	 add	%o0, 2, %o0
146
147__copy_user:	/* %o0=dst %o1=src %o2=len */
148	xor	%o0, %o1, %o4
1491:
150	andcc	%o4, 3, %o5
1512:
152	bne	cannot_optimize
153	 cmp	%o2, 15
154
155	bleu	short_aligned_end
156	 andcc	%o1, 3, %g0
157
158	bne	dword_align
1593:
160	 andcc	%o1, 4, %g0
161
162	be	2f
163	 mov	%o2, %g1
164
165	EXO2(ld [%o1], %o4)
166	sub	%g1, 4, %g1
167	EXO2(st %o4, [%o0])
168	add	%o1, 4, %o1
169	add	%o0, 4, %o0
1702:
171	andcc	%g1, 0xffffff80, %g7
172	be	3f
173	 andcc	%o0, 4, %g0
174
175	be	ldd_std + 4
1765:
177	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
178	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
179	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
180	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
18180:
182	EXT(5b, 80b, 50f)
183	subcc	%g7, 128, %g7
184	add	%o1, 128, %o1
185	bne	5b
186	 add	%o0, 128, %o0
1873:
188	andcc	%g1, 0x70, %g7
189	be	copy_user_table_end
190	 andcc	%g1, 8, %g0
191
192	sethi	%hi(copy_user_table_end), %o5
193	srl	%g7, 1, %o4
194	add	%g7, %o4, %o4
195	add	%o1, %g7, %o1
196	sub	%o5, %o4, %o5
197	jmpl	%o5 + %lo(copy_user_table_end), %g0
198	 add	%o0, %g7, %o0
199
200copy_user_table:
201	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
202	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
203	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
204	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
205	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
206	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
207	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
208copy_user_table_end:
209	EXT(copy_user_table, copy_user_table_end, 51f)
210	be	copy_user_last7
211	 andcc	%g1, 4, %g0
212
213	EX(ldd	[%o1], %g2, and %g1, 0xf)
214	add	%o0, 8, %o0
215	add	%o1, 8, %o1
216	EX(st	%g2, [%o0 - 0x08], and %g1, 0xf)
217	EX2(st	%g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
218copy_user_last7:
219	be	1f
220	 andcc	%g1, 2, %g0
221
222	EX(ld	[%o1], %g2, and %g1, 7)
223	add	%o1, 4, %o1
224	EX(st	%g2, [%o0], and %g1, 7)
225	add	%o0, 4, %o0
2261:
227	be	1f
228	 andcc	%g1, 1, %g0
229
230	EX(lduh	[%o1], %g2, and %g1, 3)
231	add	%o1, 2, %o1
232	EX(sth	%g2, [%o0], and %g1, 3)
233	add	%o0, 2, %o0
2341:
235	be	1f
236	 nop
237
238	EX(ldub	[%o1], %g2, add %g0, 1)
239	EX(stb	%g2, [%o0], add %g0, 1)
2401:
241	retl
242 	 clr	%o0
243
244ldd_std:
245	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
246	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
247	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
248	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
24981:
250	EXT(ldd_std, 81b, 52f)
251	subcc	%g7, 128, %g7
252	add	%o1, 128, %o1
253	bne	ldd_std
254	 add	%o0, 128, %o0
255
256	andcc	%g1, 0x70, %g7
257	be	copy_user_table_end
258	 andcc	%g1, 8, %g0
259
260	sethi	%hi(copy_user_table_end), %o5
261	srl	%g7, 1, %o4
262	add	%g7, %o4, %o4
263	add	%o1, %g7, %o1
264	sub	%o5, %o4, %o5
265	jmpl	%o5 + %lo(copy_user_table_end), %g0
266	 add	%o0, %g7, %o0
267
268cannot_optimize:
269	bleu	short_end
270	 cmp	%o5, 2
271
272	bne	byte_chunk
273	 and	%o2, 0xfffffff0, %o3
274
275	andcc	%o1, 1, %g0
276	be	10f
277	 nop
278
279	EXO2(ldub [%o1], %g2)
280	add	%o1, 1, %o1
281	EXO2(stb %g2, [%o0])
282	sub	%o2, 1, %o2
283	andcc	%o2, 0xfffffff0, %o3
284	be	short_end
285	 add	%o0, 1, %o0
28610:
287	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
288	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
28982:
290	EXT(10b, 82b, 53f)
291	subcc	%o3, 0x10, %o3
292	add	%o1, 0x10, %o1
293	bne	10b
294	 add	%o0, 0x10, %o0
295	b	2f
296	 and	%o2, 0xe, %o3
297
298byte_chunk:
299	MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
300	MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
301	MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
302	MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
303	MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
304	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
305	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
306	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
30783:
308	EXT(byte_chunk, 83b, 54f)
309	subcc	%o3, 0x10, %o3
310	add	%o1, 0x10, %o1
311	bne	byte_chunk
312	 add	%o0, 0x10, %o0
313
314short_end:
315	and	%o2, 0xe, %o3
3162:
317	sethi	%hi(short_table_end), %o5
318	sll	%o3, 3, %o4
319	add	%o0, %o3, %o0
320	sub	%o5, %o4, %o5
321	add	%o1, %o3, %o1
322	jmpl	%o5 + %lo(short_table_end), %g0
323	 andcc	%o2, 1, %g0
32484:
325	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
326	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
327	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
328	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
329	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
330	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
331	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
332short_table_end:
333	EXT(84b, short_table_end, 55f)
334	be	1f
335	 nop
336	EX(ldub	[%o1], %g2, add %g0, 1)
337	EX(stb	%g2, [%o0], add %g0, 1)
3381:
339	retl
340 	 clr	%o0
341
342short_aligned_end:
343	bne	short_end
344	 andcc	%o2, 8, %g0
345
346	be	1f
347	 andcc	%o2, 4, %g0
348
349	EXO2(ld	[%o1 + 0x00], %g2)
350	EXO2(ld	[%o1 + 0x04], %g3)
351	add	%o1, 8, %o1
352	EXO2(st	%g2, [%o0 + 0x00])
353	EX(st	%g3, [%o0 + 0x04], sub %o2, 4)
354	add	%o0, 8, %o0
3551:
356	b	copy_user_last7
357	 mov	%o2, %g1
358
359	.section .fixup,#alloc,#execinstr
360	.align	4
36197:
362	mov	%o2, %g3
363fixupretl:
364	sethi   %hi(PAGE_OFFSET), %g1
365	cmp	%o0, %g1
366	blu	1f
367	 cmp	%o1, %g1
368	bgeu	1f
369	 nop
370	save	%sp, -64, %sp
371	mov	%i0, %o0
372	call	__bzero
373	 mov	%g3, %o1
374	restore
3751:	retl
376	 mov	%g3, %o0
377
378/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
37950:
380/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
381 * happens. This is derived from the amount ldd reads, st stores, etc.
382 * x = g2 % 12;
383 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
384 * o0 += (g2 / 12) * 32;
385 */
386	cmp	%g2, 12
387	add	%o0, %g7, %o0
388	bcs	1f
389	 cmp	%g2, 24
390	bcs	2f
391	 cmp	%g2, 36
392	bcs	3f
393	 nop
394	sub	%g2, 12, %g2
395	sub	%g7, 32, %g7
3963:	sub	%g2, 12, %g2
397	sub	%g7, 32, %g7
3982:	sub	%g2, 12, %g2
399	sub	%g7, 32, %g7
4001:	cmp	%g2, 4
401	bcs,a	60f
402	 clr	%g2
403	sub	%g2, 4, %g2
404	sll	%g2, 2, %g2
40560:	and	%g1, 0x7f, %g3
406	sub	%o0, %g7, %o0
407	add	%g3, %g7, %g3
408	ba	fixupretl
409	 sub	%g3, %g2, %g3
41051:
411/* i = 41 - g2; j = i % 6;
412 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
413 * o0 -= (i / 6) * 16 + 16;
414 */
415	neg	%g2
416	and	%g1, 0xf, %g1
417	add	%g2, 41, %g2
418	add	%o0, %g1, %o0
4191:	cmp	%g2, 6
420	bcs,a	2f
421	 cmp	%g2, 4
422	add	%g1, 16, %g1
423	b	1b
424	 sub	%g2, 6, %g2
4252:	bcc,a	2f
426	 mov	16, %g2
427	inc	%g2
428	sll	%g2, 2, %g2
4292:	add	%g1, %g2, %g3
430	ba	fixupretl
431	 sub	%o0, %g3, %o0
43252:
433/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
434   o0 += (g2 / 8) * 32 */
435	andn	%g2, 7, %g4
436	add	%o0, %g7, %o0
437	andcc	%g2, 4, %g0
438	and	%g2, 3, %g2
439	sll	%g4, 2, %g4
440	sll	%g2, 3, %g2
441	bne	60b
442	 sub	%g7, %g4, %g7
443	ba	60b
444	 clr	%g2
44553:
446/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
447   o0 += (g2 & 8) */
448	and	%g2, 3, %g4
449	andcc	%g2, 4, %g0
450	and	%g2, 8, %g2
451	sll	%g4, 1, %g4
452	be	1f
453	 add	%o0, %g2, %o0
454	add	%g2, %g4, %g2
4551:	and	%o2, 0xf, %g3
456	add	%g3, %o3, %g3
457	ba	fixupretl
458	 sub	%g3, %g2, %g3
45954:
460/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
461   o0 += (g2 / 4) * 2 */
462	srl	%g2, 2, %o4
463	and	%g2, 1, %o5
464	srl	%g2, 1, %g2
465	add	%o4, %o4, %o4
466	and	%o5, %g2, %o5
467	and	%o2, 0xf, %o2
468	add	%o0, %o4, %o0
469	sub	%o3, %o5, %o3
470	sub	%o2, %o4, %o2
471	ba	fixupretl
472	 add	%o2, %o3, %g3
47355:
474/* i = 27 - g2;
475   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
476   o0 -= i / 4 * 2 + 1 */
477	neg	%g2
478	and	%o2, 1, %o2
479	add	%g2, 27, %g2
480	srl	%g2, 2, %o5
481	andcc	%g2, 3, %g0
482	mov	1, %g2
483	add	%o5, %o5, %o5
484	be,a	1f
485	 clr	%g2
4861:	add	%g2, %o5, %g3
487	sub	%o0, %g3, %o0
488	ba	fixupretl
489	 add	%g3, %o2, %g3
490
491	.globl  __copy_user_end
492__copy_user_end:
493