xref: /linux/arch/sh/lib/udivsi3_i4i.S (revision 03f7c1d2a49acd30e38789cd809d3300721e9b0e)
1/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
2
3   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4   2004, 2005, 2006
5   Free Software Foundation, Inc.
6*/
7
8!! libgcc routines for the Renesas / SuperH SH CPUs.
9!! Contributed by Steve Chamberlain.
10!! sac@cygnus.com
11
12!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
13!! recoded in assembly by Toshiyasu Morita
14!! tm@netcom.com
15
16/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
17   ELF local label prefixes by J"orn Rennecke
18   amylaar@cygnus.com  */
19
20/* This code used shld, thus is not suitable for SH1 / SH2.  */
21
22/* Signed / unsigned division without use of FPU, optimized for SH4.
23   Uses a lookup table for divisors in the range -128 .. +128, and
24   div1 with case distinction for larger divisors in three more ranges.
25   The code is lumped together with the table to allow the use of mova.  */
26#ifdef CONFIG_CPU_LITTLE_ENDIAN
27#define L_LSB 0
28#define L_LSWMSB 1
29#define L_MSWLSB 2
30#else
31#define L_LSB 3
32#define L_LSWMSB 2
33#define L_MSWLSB 1
34#endif
35
36	.balign 4
37	.global	__udivsi3_i4i
38	.global	__udivsi3_i4
39	.set	__udivsi3_i4, __udivsi3_i4i
40	.type	__udivsi3_i4i, @function
41__udivsi3_i4i:
42	mov.w c128_w, r1
43	div0u
44	mov r4,r0
45	shlr8 r0
46	cmp/hi r1,r5
47	extu.w r5,r1
48	bf udiv_le128
49	cmp/eq r5,r1
50	bf udiv_ge64k
51	shlr r0
52	mov r5,r1
53	shll16 r5
54	mov.l r4,@-r15
55	div1 r5,r0
56	mov.l r1,@-r15
57	div1 r5,r0
58	div1 r5,r0
59	bra udiv_25
60	div1 r5,r0
61
62div_le128:
63	mova div_table_ix,r0
64	bra div_le128_2
65	mov.b @(r0,r5),r1
66udiv_le128:
67	mov.l r4,@-r15
68	mova div_table_ix,r0
69	mov.b @(r0,r5),r1
70	mov.l r5,@-r15
71div_le128_2:
72	mova div_table_inv,r0
73	mov.l @(r0,r1),r1
74	mov r5,r0
75	tst #0xfe,r0
76	mova div_table_clz,r0
77	dmulu.l r1,r4
78	mov.b @(r0,r5),r1
79	bt/s div_by_1
80	mov r4,r0
81	mov.l @r15+,r5
82	sts mach,r0
83	/* clrt */
84	addc r4,r0
85	mov.l @r15+,r4
86	rotcr r0
87	rts
88	shld r1,r0
89
90div_by_1_neg:
91	neg r4,r0
92div_by_1:
93	mov.l @r15+,r5
94	rts
95	mov.l @r15+,r4
96
97div_ge64k:
98	bt/s div_r8
99	div0u
100	shll8 r5
101	bra div_ge64k_2
102	div1 r5,r0
103udiv_ge64k:
104	cmp/hi r0,r5
105	mov r5,r1
106	bt udiv_r8
107	shll8 r5
108	mov.l r4,@-r15
109	div1 r5,r0
110	mov.l r1,@-r15
111div_ge64k_2:
112	div1 r5,r0
113	mov.l zero_l,r1
114	.rept 4
115	div1 r5,r0
116	.endr
117	mov.l r1,@-r15
118	div1 r5,r0
119	mov.w m256_w,r1
120	div1 r5,r0
121	mov.b r0,@(L_LSWMSB,r15)
122	xor r4,r0
123	and r1,r0
124	bra div_ge64k_end
125	xor r4,r0
126
127div_r8:
128	shll16 r4
129	bra div_r8_2
130	shll8 r4
131udiv_r8:
132	mov.l r4,@-r15
133	shll16 r4
134	clrt
135	shll8 r4
136	mov.l r5,@-r15
137div_r8_2:
138	rotcl r4
139	mov r0,r1
140	div1 r5,r1
141	mov r4,r0
142	rotcl r0
143	mov r5,r4
144	div1 r5,r1
145	.rept 5
146	rotcl r0; div1 r5,r1
147	.endr
148	rotcl r0
149	mov.l @r15+,r5
150	div1 r4,r1
151	mov.l @r15+,r4
152	rts
153	rotcl r0
154
155	.global	__sdivsi3_i4i
156	.global __sdivsi3_i4
157	.global	__sdivsi3
158	.set	__sdivsi3_i4, __sdivsi3_i4i
159	.set	__sdivsi3, __sdivsi3_i4i
160	.type	__sdivsi3_i4i, @function
161	/* This is link-compatible with a __sdivsi3 call,
162	   but we effectively clobber only r1.  */
163__sdivsi3_i4i:
164	mov.l r4,@-r15
165	cmp/pz r5
166	mov.w c128_w, r1
167	bt/s pos_divisor
168	cmp/pz r4
169	mov.l r5,@-r15
170	neg r5,r5
171	bt/s neg_result
172	cmp/hi r1,r5
173	neg r4,r4
174pos_result:
175	extu.w r5,r0
176	bf div_le128
177	cmp/eq r5,r0
178	mov r4,r0
179	shlr8 r0
180	bf/s div_ge64k
181	cmp/hi r0,r5
182	div0u
183	shll16 r5
184	div1 r5,r0
185	div1 r5,r0
186	div1 r5,r0
187udiv_25:
188	mov.l zero_l,r1
189	div1 r5,r0
190	div1 r5,r0
191	mov.l r1,@-r15
192	.rept 3
193	div1 r5,r0
194	.endr
195	mov.b r0,@(L_MSWLSB,r15)
196	xtrct r4,r0
197	swap.w r0,r0
198	.rept 8
199	div1 r5,r0
200	.endr
201	mov.b r0,@(L_LSWMSB,r15)
202div_ge64k_end:
203	.rept 8
204	div1 r5,r0
205	.endr
206	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
207	extu.b r0,r0
208	mov.l @r15+,r5
209	or r4,r0
210	mov.l @r15+,r4
211	rts
212	rotcl r0
213
214div_le128_neg:
215	tst #0xfe,r0
216	mova div_table_ix,r0
217	mov.b @(r0,r5),r1
218	mova div_table_inv,r0
219	bt/s div_by_1_neg
220	mov.l @(r0,r1),r1
221	mova div_table_clz,r0
222	dmulu.l r1,r4
223	mov.b @(r0,r5),r1
224	mov.l @r15+,r5
225	sts mach,r0
226	/* clrt */
227	addc r4,r0
228	mov.l @r15+,r4
229	rotcr r0
230	shld r1,r0
231	rts
232	neg r0,r0
233
234pos_divisor:
235	mov.l r5,@-r15
236	bt/s pos_result
237	cmp/hi r1,r5
238	neg r4,r4
239neg_result:
240	extu.w r5,r0
241	bf div_le128_neg
242	cmp/eq r5,r0
243	mov r4,r0
244	shlr8 r0
245	bf/s div_ge64k_neg
246	cmp/hi r0,r5
247	div0u
248	mov.l zero_l,r1
249	shll16 r5
250	div1 r5,r0
251	mov.l r1,@-r15
252	.rept 7
253	div1 r5,r0
254	.endr
255	mov.b r0,@(L_MSWLSB,r15)
256	xtrct r4,r0
257	swap.w r0,r0
258	.rept 8
259	div1 r5,r0
260	.endr
261	mov.b r0,@(L_LSWMSB,r15)
262div_ge64k_neg_end:
263	.rept 8
264	div1 r5,r0
265	.endr
266	mov.l @r15+,r4 ! zero-extension and swap using LS unit.
267	extu.b r0,r1
268	mov.l @r15+,r5
269	or r4,r1
270div_r8_neg_end:
271	mov.l @r15+,r4
272	rotcl r1
273	rts
274	neg r1,r0
275
276div_ge64k_neg:
277	bt/s div_r8_neg
278	div0u
279	shll8 r5
280	mov.l zero_l,r1
281	.rept 6
282	div1 r5,r0
283	.endr
284	mov.l r1,@-r15
285	div1 r5,r0
286	mov.w m256_w,r1
287	div1 r5,r0
288	mov.b r0,@(L_LSWMSB,r15)
289	xor r4,r0
290	and r1,r0
291	bra div_ge64k_neg_end
292	xor r4,r0
293
294c128_w:
295	.word 128
296
297div_r8_neg:
298	clrt
299	shll16 r4
300	mov r4,r1
301	shll8 r1
302	mov r5,r4
303	.rept 7
304	rotcl r1; div1 r5,r0
305	.endr
306	mov.l @r15+,r5
307	rotcl r1
308	bra div_r8_neg_end
309	div1 r4,r0
310
311m256_w:
312	.word 0xff00
313/* This table has been generated by divtab-sh4.c.  */
314	.balign 4
315div_table_clz:
316	.byte	0
317	.byte	1
318	.byte	0
319	.byte	-1
320	.byte	-1
321	.byte	-2
322	.byte	-2
323	.byte	-2
324	.byte	-2
325	.byte	-3
326	.byte	-3
327	.byte	-3
328	.byte	-3
329	.byte	-3
330	.byte	-3
331	.byte	-3
332	.byte	-3
333	.byte	-4
334	.byte	-4
335	.byte	-4
336	.byte	-4
337	.byte	-4
338	.byte	-4
339	.byte	-4
340	.byte	-4
341	.byte	-4
342	.byte	-4
343	.byte	-4
344	.byte	-4
345	.byte	-4
346	.byte	-4
347	.byte	-4
348	.byte	-4
349	.byte	-5
350	.byte	-5
351	.byte	-5
352	.byte	-5
353	.byte	-5
354	.byte	-5
355	.byte	-5
356	.byte	-5
357	.byte	-5
358	.byte	-5
359	.byte	-5
360	.byte	-5
361	.byte	-5
362	.byte	-5
363	.byte	-5
364	.byte	-5
365	.byte	-5
366	.byte	-5
367	.byte	-5
368	.byte	-5
369	.byte	-5
370	.byte	-5
371	.byte	-5
372	.byte	-5
373	.byte	-5
374	.byte	-5
375	.byte	-5
376	.byte	-5
377	.byte	-5
378	.byte	-5
379	.byte	-5
380	.byte	-5
381	.byte	-6
382	.byte	-6
383	.byte	-6
384	.byte	-6
385	.byte	-6
386	.byte	-6
387	.byte	-6
388	.byte	-6
389	.byte	-6
390	.byte	-6
391	.byte	-6
392	.byte	-6
393	.byte	-6
394	.byte	-6
395	.byte	-6
396	.byte	-6
397	.byte	-6
398	.byte	-6
399	.byte	-6
400	.byte	-6
401	.byte	-6
402	.byte	-6
403	.byte	-6
404	.byte	-6
405	.byte	-6
406	.byte	-6
407	.byte	-6
408	.byte	-6
409	.byte	-6
410	.byte	-6
411	.byte	-6
412	.byte	-6
413	.byte	-6
414	.byte	-6
415	.byte	-6
416	.byte	-6
417	.byte	-6
418	.byte	-6
419	.byte	-6
420	.byte	-6
421	.byte	-6
422	.byte	-6
423	.byte	-6
424	.byte	-6
425	.byte	-6
426	.byte	-6
427	.byte	-6
428	.byte	-6
429	.byte	-6
430	.byte	-6
431	.byte	-6
432	.byte	-6
433	.byte	-6
434	.byte	-6
435	.byte	-6
436	.byte	-6
437	.byte	-6
438	.byte	-6
439	.byte	-6
440	.byte	-6
441	.byte	-6
442	.byte	-6
443	.byte	-6
444/* Lookup table translating positive divisor to index into table of
445   normalized inverse.  N.B. the '0' entry is also the last entry of the
446 previous table, and causes an unaligned access for division by zero.  */
447div_table_ix:
448	.byte	-6
449	.byte	-128
450	.byte	-128
451	.byte	0
452	.byte	-128
453	.byte	-64
454	.byte	0
455	.byte	64
456	.byte	-128
457	.byte	-96
458	.byte	-64
459	.byte	-32
460	.byte	0
461	.byte	32
462	.byte	64
463	.byte	96
464	.byte	-128
465	.byte	-112
466	.byte	-96
467	.byte	-80
468	.byte	-64
469	.byte	-48
470	.byte	-32
471	.byte	-16
472	.byte	0
473	.byte	16
474	.byte	32
475	.byte	48
476	.byte	64
477	.byte	80
478	.byte	96
479	.byte	112
480	.byte	-128
481	.byte	-120
482	.byte	-112
483	.byte	-104
484	.byte	-96
485	.byte	-88
486	.byte	-80
487	.byte	-72
488	.byte	-64
489	.byte	-56
490	.byte	-48
491	.byte	-40
492	.byte	-32
493	.byte	-24
494	.byte	-16
495	.byte	-8
496	.byte	0
497	.byte	8
498	.byte	16
499	.byte	24
500	.byte	32
501	.byte	40
502	.byte	48
503	.byte	56
504	.byte	64
505	.byte	72
506	.byte	80
507	.byte	88
508	.byte	96
509	.byte	104
510	.byte	112
511	.byte	120
512	.byte	-128
513	.byte	-124
514	.byte	-120
515	.byte	-116
516	.byte	-112
517	.byte	-108
518	.byte	-104
519	.byte	-100
520	.byte	-96
521	.byte	-92
522	.byte	-88
523	.byte	-84
524	.byte	-80
525	.byte	-76
526	.byte	-72
527	.byte	-68
528	.byte	-64
529	.byte	-60
530	.byte	-56
531	.byte	-52
532	.byte	-48
533	.byte	-44
534	.byte	-40
535	.byte	-36
536	.byte	-32
537	.byte	-28
538	.byte	-24
539	.byte	-20
540	.byte	-16
541	.byte	-12
542	.byte	-8
543	.byte	-4
544	.byte	0
545	.byte	4
546	.byte	8
547	.byte	12
548	.byte	16
549	.byte	20
550	.byte	24
551	.byte	28
552	.byte	32
553	.byte	36
554	.byte	40
555	.byte	44
556	.byte	48
557	.byte	52
558	.byte	56
559	.byte	60
560	.byte	64
561	.byte	68
562	.byte	72
563	.byte	76
564	.byte	80
565	.byte	84
566	.byte	88
567	.byte	92
568	.byte	96
569	.byte	100
570	.byte	104
571	.byte	108
572	.byte	112
573	.byte	116
574	.byte	120
575	.byte	124
576	.byte	-128
577/* 1/64 .. 1/127, normalized.  There is an implicit leading 1 in bit 32.  */
578	.balign 4
579zero_l:
580	.long	0x0
581	.long	0xF81F81F9
582	.long	0xF07C1F08
583	.long	0xE9131AC0
584	.long	0xE1E1E1E2
585	.long	0xDAE6076C
586	.long	0xD41D41D5
587	.long	0xCD856891
588	.long	0xC71C71C8
589	.long	0xC0E07039
590	.long	0xBACF914D
591	.long	0xB4E81B4F
592	.long	0xAF286BCB
593	.long	0xA98EF607
594	.long	0xA41A41A5
595	.long	0x9EC8E952
596	.long	0x9999999A
597	.long	0x948B0FCE
598	.long	0x8F9C18FA
599	.long	0x8ACB90F7
600	.long	0x86186187
601	.long	0x81818182
602	.long	0x7D05F418
603	.long	0x78A4C818
604	.long	0x745D1746
605	.long	0x702E05C1
606	.long	0x6C16C16D
607	.long	0x68168169
608	.long	0x642C8591
609	.long	0x60581606
610	.long	0x5C9882BA
611	.long	0x58ED2309
612div_table_inv:
613	.long	0x55555556
614	.long	0x51D07EAF
615	.long	0x4E5E0A73
616	.long	0x4AFD6A06
617	.long	0x47AE147B
618	.long	0x446F8657
619	.long	0x41414142
620	.long	0x3E22CBCF
621	.long	0x3B13B13C
622	.long	0x38138139
623	.long	0x3521CFB3
624	.long	0x323E34A3
625	.long	0x2F684BDB
626	.long	0x2C9FB4D9
627	.long	0x29E4129F
628	.long	0x27350B89
629	.long	0x24924925
630	.long	0x21FB7813
631	.long	0x1F7047DD
632	.long	0x1CF06ADB
633	.long	0x1A7B9612
634	.long	0x18118119
635	.long	0x15B1E5F8
636	.long	0x135C8114
637	.long	0x11111112
638	.long	0xECF56BF
639	.long	0xC9714FC
640	.long	0xA6810A7
641	.long	0x8421085
642	.long	0x624DD30
643	.long	0x4104105
644	.long	0x2040811
645	/* maximum error: 0.987342 scaled: 0.921875*/
646