xref: /linux/arch/sh/lib/udivsi3_i4i-Os.S (revision a8fe58cec351c25e09c393bf46117c0c47b5a17c)
1/* Copyright (C) 2006 Free Software Foundation, Inc.
2
3This file is free software; you can redistribute it and/or modify it
4under the terms of the GNU General Public License as published by the
5Free Software Foundation; either version 2, or (at your option) any
6later version.
7
8In addition to the permissions in the GNU General Public License, the
9Free Software Foundation gives you unlimited permission to link the
10compiled version of this file into combinations with other programs,
11and to distribute those combinations without any restriction coming
12from the use of this file.  (The General Public License restrictions
13do apply in other respects; for example, they cover modification of
14the file, and distribution when not linked into a combine
15executable.)
16
17This file is distributed in the hope that it will be useful, but
18WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20General Public License for more details.
21
22You should have received a copy of the GNU General Public License
23along with this program; see the file COPYING.  If not, write to
24the Free Software Foundation, 51 Franklin Street, Fifth Floor,
25Boston, MA 02110-1301, USA.  */
26
27/* Moderately Space-optimized libgcc routines for the Renesas SH /
28   STMicroelectronics ST40 CPUs.
29   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
30
31/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
32   sh4-200 run times:
33   udiv small divisor: 55 cycles
34   udiv large divisor: 52 cycles
35   sdiv small divisor, positive result: 59 cycles
36   sdiv large divisor, positive result: 56 cycles
37   sdiv small divisor, negative result: 65 cycles (*)
38   sdiv large divisor, negative result: 62 cycles (*)
39   (*): r2 is restored in the rts delay slot and has a lingering latency
40        of two more cycles.  */
41	.balign 4
42	.global	__udivsi3_i4i
43	.global	__udivsi3_i4
44	.set	__udivsi3_i4, __udivsi3_i4i
45	.type	__udivsi3_i4i, @function
46	.type	__sdivsi3_i4i, @function
47__udivsi3_i4i:
48	sts pr,r1
49	mov.l r4,@-r15
50	extu.w r5,r0
51	cmp/eq r5,r0
52	swap.w r4,r0
53	shlr16 r4
54	bf/s large_divisor
55	div0u
56	mov.l r5,@-r15
57	shll16 r5
58sdiv_small_divisor:
59	div1 r5,r4
60	bsr div6
61	div1 r5,r4
62	div1 r5,r4
63	bsr div6
64	div1 r5,r4
65	xtrct r4,r0
66	xtrct r0,r4
67	bsr div7
68	swap.w r4,r4
69	div1 r5,r4
70	bsr div7
71	div1 r5,r4
72	xtrct r4,r0
73	mov.l @r15+,r5
74	swap.w r0,r0
75	mov.l @r15+,r4
76	jmp @r1
77	rotcl r0
78div7:
79	div1 r5,r4
80div6:
81	            div1 r5,r4; div1 r5,r4; div1 r5,r4
82	div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
83
84divx3:
85	rotcl r0
86	div1 r5,r4
87	rotcl r0
88	div1 r5,r4
89	rotcl r0
90	rts
91	div1 r5,r4
92
93large_divisor:
94	mov.l r5,@-r15
95sdiv_large_divisor:
96	xor r4,r0
97	.rept 4
98	rotcl r0
99	bsr divx3
100	div1 r5,r4
101	.endr
102	mov.l @r15+,r5
103	mov.l @r15+,r4
104	jmp @r1
105	rotcl r0
106
107	.global	__sdivsi3_i4i
108	.global __sdivsi3_i4
109	.global __sdivsi3
110	.set	__sdivsi3_i4, __sdivsi3_i4i
111	.set	__sdivsi3, __sdivsi3_i4i
112__sdivsi3_i4i:
113	mov.l r4,@-r15
114	cmp/pz r5
115	mov.l r5,@-r15
116	bt/s pos_divisor
117	cmp/pz r4
118	neg r5,r5
119	extu.w r5,r0
120	bt/s neg_result
121	cmp/eq r5,r0
122	neg r4,r4
123pos_result:
124	swap.w r4,r0
125	bra sdiv_check_divisor
126	sts pr,r1
127pos_divisor:
128	extu.w r5,r0
129	bt/s pos_result
130	cmp/eq r5,r0
131	neg r4,r4
132neg_result:
133	mova negate_result,r0
134	;
135	mov r0,r1
136	swap.w r4,r0
137	lds r2,macl
138	sts pr,r2
139sdiv_check_divisor:
140	shlr16 r4
141	bf/s sdiv_large_divisor
142	div0u
143	bra sdiv_small_divisor
144	shll16 r5
145	.balign 4
146negate_result:
147	neg r0,r0
148	jmp @r2
149	sts macl,r2
150