xref: /linux/arch/sh/lib/udivsi3_i4i.S (revision 6ee738610f41b59733f63718f0bdbcba7d3a3f12)
1/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2   2004, 2005, 2006
3   Free Software Foundation, Inc.
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 2, or (at your option) any
8later version.
9
10In addition to the permissions in the GNU General Public License, the
11Free Software Foundation gives you unlimited permission to link the
12compiled version of this file into combinations with other programs,
13and to distribute those combinations without any restriction coming
14from the use of this file.  (The General Public License restrictions
15do apply in other respects; for example, they cover modification of
16the file, and distribution when not linked into a combine
17executable.)
18
19This file is distributed in the hope that it will be useful, but
20WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22General Public License for more details.
23
24You should have received a copy of the GNU General Public License
25along with this program; see the file COPYING.  If not, write to
26the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27Boston, MA 02110-1301, USA.  */
28
29!! libgcc routines for the Renesas / SuperH SH CPUs.
30!! Contributed by Steve Chamberlain.
31!! sac@cygnus.com
32
33!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34!! recoded in assembly by Toshiyasu Morita
35!! tm@netcom.com
36
37/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38   ELF local label prefixes by J"orn Rennecke
39   amylaar@cygnus.com  */
40
41/* This code used shld, thus is not suitable for SH1 / SH2.  */
42
43/* Signed / unsigned division without use of FPU, optimized for SH4.
44   Uses a lookup table for divisors in the range -128 .. +128, and
45   div1 with case distinction for larger divisors in three more ranges.
46   The code is lumped together with the table to allow the use of mova.  */
47#ifdef CONFIG_CPU_LITTLE_ENDIAN
48#define L_LSB 0
49#define L_LSWMSB 1
50#define L_MSWLSB 2
51#else
52#define L_LSB 3
53#define L_LSWMSB 2
54#define L_MSWLSB 1
55#endif
56
57	.balign 4
58	.global	__udivsi3_i4i
59	.global	__udivsi3_i4
60	.set	__udivsi3_i4, __udivsi3_i4i
61	.type	__udivsi3_i4i, @function
62__udivsi3_i4i:
63	mov.w c128_w, r1
64	div0u
65	mov r4,r0
66	shlr8 r0
67	cmp/hi r1,r5
68	extu.w r5,r1
69	bf udiv_le128
70	cmp/eq r5,r1
71	bf udiv_ge64k
72	shlr r0
73	mov r5,r1
74	shll16 r5
75	mov.l r4,@-r15
76	div1 r5,r0
77	mov.l r1,@-r15
78	div1 r5,r0
79	div1 r5,r0
80	bra udiv_25
81	div1 r5,r0
82
83div_le128:
84	mova div_table_ix,r0
85	bra div_le128_2
86	mov.b @(r0,r5),r1
87udiv_le128:
88	mov.l r4,@-r15
89	mova div_table_ix,r0
90	mov.b @(r0,r5),r1
91	mov.l r5,@-r15
92div_le128_2:
93	mova div_table_inv,r0
94	mov.l @(r0,r1),r1
95	mov r5,r0
96	tst #0xfe,r0
97	mova div_table_clz,r0
98	dmulu.l r1,r4
99	mov.b @(r0,r5),r1
100	bt/s div_by_1
101	mov r4,r0
102	mov.l @r15+,r5
103	sts mach,r0
104	/* clrt */
105	addc r4,r0
106	mov.l @r15+,r4
107	rotcr r0
108	rts
109	shld r1,r0
110
111div_by_1_neg:
112	neg r4,r0
113div_by_1:
114	mov.l @r15+,r5
115	rts
116	mov.l @r15+,r4
117
118div_ge64k:
119	bt/s div_r8
120	div0u
121	shll8 r5
122	bra div_ge64k_2
123	div1 r5,r0
124udiv_ge64k:
125	cmp/hi r0,r5
126	mov r5,r1
127	bt udiv_r8
128	shll8 r5
129	mov.l r4,@-r15
130	div1 r5,r0
131	mov.l r1,@-r15
132div_ge64k_2:
133	div1 r5,r0
134	mov.l zero_l,r1
135	.rept 4
136	div1 r5,r0
137	.endr
138	mov.l r1,@-r15
139	div1 r5,r0
140	mov.w m256_w,r1
141	div1 r5,r0
142	mov.b r0,@(L_LSWMSB,r15)
143	xor r4,r0
144	and r1,r0
145	bra div_ge64k_end
146	xor r4,r0
147
148div_r8:
149	shll16 r4
150	bra div_r8_2
151	shll8 r4
152udiv_r8:
153	mov.l r4,@-r15
154	shll16 r4
155	clrt
156	shll8 r4
157	mov.l r5,@-r15
158div_r8_2:
159	rotcl r4
160	mov r0,r1
161	div1 r5,r1
162	mov r4,r0
163	rotcl r0
164	mov r5,r4
165	div1 r5,r1
166	.rept 5
167	rotcl r0; div1 r5,r1
168	.endr
169	rotcl r0
170	mov.l @r15+,r5
171	div1 r4,r1
172	mov.l @r15+,r4
173	rts
174	rotcl r0
175
176	.global	__sdivsi3_i4i
177	.global __sdivsi3_i4
178	.global	__sdivsi3
179	.set	__sdivsi3_i4, __sdivsi3_i4i
180	.set	__sdivsi3, __sdivsi3_i4i
181	.type	__sdivsi3_i4i, @function
182	/* This is link-compatible with a __sdivsi3 call,
183	   but we effectively clobber only r1.  */
184__sdivsi3_i4i:
185	mov.l r4,@-r15
186	cmp/pz r5
187	mov.w c128_w, r1
188	bt/s pos_divisor
189	cmp/pz r4
190	mov.l r5,@-r15
191	neg r5,r5
192	bt/s neg_result
193	cmp/hi r1,r5
194	neg r4,r4
195pos_result:
196	extu.w r5,r0
197	bf div_le128
198	cmp/eq r5,r0
199	mov r4,r0
200	shlr8 r0
201	bf/s div_ge64k
202	cmp/hi r0,r5
203	div0u
204	shll16 r5
205	div1 r5,r0
206	div1 r5,r0
207	div1 r5,r0
208udiv_25:
209	mov.l zero_l,r1
210	div1 r5,r0
211	div1 r5,r0
212	mov.l r1,@-r15
213	.rept 3
214	div1 r5,r0
215	.endr
216	mov.b r0,@(L_MSWLSB,r15)
217	xtrct r4,r0
218	swap.w r0,r0
219	.rept 8
220	div1 r5,r0
221	.endr
222	mov.b r0,@(L_LSWMSB,r15)
223div_ge64k_end:
224	.rept 8
225	div1 r5,r0
226	.endr
227	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
228	extu.b r0,r0
229	mov.l @r15+,r5
230	or r4,r0
231	mov.l @r15+,r4
232	rts
233	rotcl r0
234
235div_le128_neg:
236	tst #0xfe,r0
237	mova div_table_ix,r0
238	mov.b @(r0,r5),r1
239	mova div_table_inv,r0
240	bt/s div_by_1_neg
241	mov.l @(r0,r1),r1
242	mova div_table_clz,r0
243	dmulu.l r1,r4
244	mov.b @(r0,r5),r1
245	mov.l @r15+,r5
246	sts mach,r0
247	/* clrt */
248	addc r4,r0
249	mov.l @r15+,r4
250	rotcr r0
251	shld r1,r0
252	rts
253	neg r0,r0
254
255pos_divisor:
256	mov.l r5,@-r15
257	bt/s pos_result
258	cmp/hi r1,r5
259	neg r4,r4
260neg_result:
261	extu.w r5,r0
262	bf div_le128_neg
263	cmp/eq r5,r0
264	mov r4,r0
265	shlr8 r0
266	bf/s div_ge64k_neg
267	cmp/hi r0,r5
268	div0u
269	mov.l zero_l,r1
270	shll16 r5
271	div1 r5,r0
272	mov.l r1,@-r15
273	.rept 7
274	div1 r5,r0
275	.endr
276	mov.b r0,@(L_MSWLSB,r15)
277	xtrct r4,r0
278	swap.w r0,r0
279	.rept 8
280	div1 r5,r0
281	.endr
282	mov.b r0,@(L_LSWMSB,r15)
283div_ge64k_neg_end:
284	.rept 8
285	div1 r5,r0
286	.endr
287	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
288	extu.b r0,r1
289	mov.l @r15+,r5
290	or r4,r1
291div_r8_neg_end:
292	mov.l @r15+,r4
293	rotcl r1
294	rts
295	neg r1,r0
296
297div_ge64k_neg:
298	bt/s div_r8_neg
299	div0u
300	shll8 r5
301	mov.l zero_l,r1
302	.rept 6
303	div1 r5,r0
304	.endr
305	mov.l r1,@-r15
306	div1 r5,r0
307	mov.w m256_w,r1
308	div1 r5,r0
309	mov.b r0,@(L_LSWMSB,r15)
310	xor r4,r0
311	and r1,r0
312	bra div_ge64k_neg_end
313	xor r4,r0
314
315c128_w:
316	.word 128
317
318div_r8_neg:
319	clrt
320	shll16 r4
321	mov r4,r1
322	shll8 r1
323	mov r5,r4
324	.rept 7
325	rotcl r1; div1 r5,r0
326	.endr
327	mov.l @r15+,r5
328	rotcl r1
329	bra div_r8_neg_end
330	div1 r4,r0
331
332m256_w:
333	.word 0xff00
334/* This table has been generated by divtab-sh4.c.  */
335	.balign 4
336div_table_clz:
337	.byte	0
338	.byte	1
339	.byte	0
340	.byte	-1
341	.byte	-1
342	.byte	-2
343	.byte	-2
344	.byte	-2
345	.byte	-2
346	.byte	-3
347	.byte	-3
348	.byte	-3
349	.byte	-3
350	.byte	-3
351	.byte	-3
352	.byte	-3
353	.byte	-3
354	.byte	-4
355	.byte	-4
356	.byte	-4
357	.byte	-4
358	.byte	-4
359	.byte	-4
360	.byte	-4
361	.byte	-4
362	.byte	-4
363	.byte	-4
364	.byte	-4
365	.byte	-4
366	.byte	-4
367	.byte	-4
368	.byte	-4
369	.byte	-4
370	.byte	-5
371	.byte	-5
372	.byte	-5
373	.byte	-5
374	.byte	-5
375	.byte	-5
376	.byte	-5
377	.byte	-5
378	.byte	-5
379	.byte	-5
380	.byte	-5
381	.byte	-5
382	.byte	-5
383	.byte	-5
384	.byte	-5
385	.byte	-5
386	.byte	-5
387	.byte	-5
388	.byte	-5
389	.byte	-5
390	.byte	-5
391	.byte	-5
392	.byte	-5
393	.byte	-5
394	.byte	-5
395	.byte	-5
396	.byte	-5
397	.byte	-5
398	.byte	-5
399	.byte	-5
400	.byte	-5
401	.byte	-5
402	.byte	-6
403	.byte	-6
404	.byte	-6
405	.byte	-6
406	.byte	-6
407	.byte	-6
408	.byte	-6
409	.byte	-6
410	.byte	-6
411	.byte	-6
412	.byte	-6
413	.byte	-6
414	.byte	-6
415	.byte	-6
416	.byte	-6
417	.byte	-6
418	.byte	-6
419	.byte	-6
420	.byte	-6
421	.byte	-6
422	.byte	-6
423	.byte	-6
424	.byte	-6
425	.byte	-6
426	.byte	-6
427	.byte	-6
428	.byte	-6
429	.byte	-6
430	.byte	-6
431	.byte	-6
432	.byte	-6
433	.byte	-6
434	.byte	-6
435	.byte	-6
436	.byte	-6
437	.byte	-6
438	.byte	-6
439	.byte	-6
440	.byte	-6
441	.byte	-6
442	.byte	-6
443	.byte	-6
444	.byte	-6
445	.byte	-6
446	.byte	-6
447	.byte	-6
448	.byte	-6
449	.byte	-6
450	.byte	-6
451	.byte	-6
452	.byte	-6
453	.byte	-6
454	.byte	-6
455	.byte	-6
456	.byte	-6
457	.byte	-6
458	.byte	-6
459	.byte	-6
460	.byte	-6
461	.byte	-6
462	.byte	-6
463	.byte	-6
464	.byte	-6
465/* Lookup table translating positive divisor to index into table of
466   normalized inverse.  N.B. the '0' entry is also the last entry of the
467 previous table, and causes an unaligned access for division by zero.  */
468div_table_ix:
469	.byte	-6
470	.byte	-128
471	.byte	-128
472	.byte	0
473	.byte	-128
474	.byte	-64
475	.byte	0
476	.byte	64
477	.byte	-128
478	.byte	-96
479	.byte	-64
480	.byte	-32
481	.byte	0
482	.byte	32
483	.byte	64
484	.byte	96
485	.byte	-128
486	.byte	-112
487	.byte	-96
488	.byte	-80
489	.byte	-64
490	.byte	-48
491	.byte	-32
492	.byte	-16
493	.byte	0
494	.byte	16
495	.byte	32
496	.byte	48
497	.byte	64
498	.byte	80
499	.byte	96
500	.byte	112
501	.byte	-128
502	.byte	-120
503	.byte	-112
504	.byte	-104
505	.byte	-96
506	.byte	-88
507	.byte	-80
508	.byte	-72
509	.byte	-64
510	.byte	-56
511	.byte	-48
512	.byte	-40
513	.byte	-32
514	.byte	-24
515	.byte	-16
516	.byte	-8
517	.byte	0
518	.byte	8
519	.byte	16
520	.byte	24
521	.byte	32
522	.byte	40
523	.byte	48
524	.byte	56
525	.byte	64
526	.byte	72
527	.byte	80
528	.byte	88
529	.byte	96
530	.byte	104
531	.byte	112
532	.byte	120
533	.byte	-128
534	.byte	-124
535	.byte	-120
536	.byte	-116
537	.byte	-112
538	.byte	-108
539	.byte	-104
540	.byte	-100
541	.byte	-96
542	.byte	-92
543	.byte	-88
544	.byte	-84
545	.byte	-80
546	.byte	-76
547	.byte	-72
548	.byte	-68
549	.byte	-64
550	.byte	-60
551	.byte	-56
552	.byte	-52
553	.byte	-48
554	.byte	-44
555	.byte	-40
556	.byte	-36
557	.byte	-32
558	.byte	-28
559	.byte	-24
560	.byte	-20
561	.byte	-16
562	.byte	-12
563	.byte	-8
564	.byte	-4
565	.byte	0
566	.byte	4
567	.byte	8
568	.byte	12
569	.byte	16
570	.byte	20
571	.byte	24
572	.byte	28
573	.byte	32
574	.byte	36
575	.byte	40
576	.byte	44
577	.byte	48
578	.byte	52
579	.byte	56
580	.byte	60
581	.byte	64
582	.byte	68
583	.byte	72
584	.byte	76
585	.byte	80
586	.byte	84
587	.byte	88
588	.byte	92
589	.byte	96
590	.byte	100
591	.byte	104
592	.byte	108
593	.byte	112
594	.byte	116
595	.byte	120
596	.byte	124
597	.byte	-128
598/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
599	.balign 4
600zero_l:
601	.long	0x0
602	.long	0xF81F81F9
603	.long	0xF07C1F08
604	.long	0xE9131AC0
605	.long	0xE1E1E1E2
606	.long	0xDAE6076C
607	.long	0xD41D41D5
608	.long	0xCD856891
609	.long	0xC71C71C8
610	.long	0xC0E07039
611	.long	0xBACF914D
612	.long	0xB4E81B4F
613	.long	0xAF286BCB
614	.long	0xA98EF607
615	.long	0xA41A41A5
616	.long	0x9EC8E952
617	.long	0x9999999A
618	.long	0x948B0FCE
619	.long	0x8F9C18FA
620	.long	0x8ACB90F7
621	.long	0x86186187
622	.long	0x81818182
623	.long	0x7D05F418
624	.long	0x78A4C818
625	.long	0x745D1746
626	.long	0x702E05C1
627	.long	0x6C16C16D
628	.long	0x68168169
629	.long	0x642C8591
630	.long	0x60581606
631	.long	0x5C9882BA
632	.long	0x58ED2309
633div_table_inv:
634	.long	0x55555556
635	.long	0x51D07EAF
636	.long	0x4E5E0A73
637	.long	0x4AFD6A06
638	.long	0x47AE147B
639	.long	0x446F8657
640	.long	0x41414142
641	.long	0x3E22CBCF
642	.long	0x3B13B13C
643	.long	0x38138139
644	.long	0x3521CFB3
645	.long	0x323E34A3
646	.long	0x2F684BDB
647	.long	0x2C9FB4D9
648	.long	0x29E4129F
649	.long	0x27350B89
650	.long	0x24924925
651	.long	0x21FB7813
652	.long	0x1F7047DD
653	.long	0x1CF06ADB
654	.long	0x1A7B9612
655	.long	0x18118119
656	.long	0x15B1E5F8
657	.long	0x135C8114
658	.long	0x11111112
659	.long	0xECF56BF
660	.long	0xC9714FC
661	.long	0xA6810A7
662	.long	0x8421085
663	.long	0x624DD30
664	.long	0x4104105
665	.long	0x2040811
666	/* maximum error: 0.987342 scaled: 0.921875*/
667