xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/arm/udivmodsi4.S (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric//===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
2*0b57cec5SDimitry Andric//
3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric//
7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric//
9*0b57cec5SDimitry Andric// This file implements the __udivmodsi4 (32-bit unsigned integer divide and
10*0b57cec5SDimitry Andric// modulus) function for the ARM 32-bit architecture.
11*0b57cec5SDimitry Andric//
12*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric
14*0b57cec5SDimitry Andric#include "../assembly.h"
15*0b57cec5SDimitry Andric
16*0b57cec5SDimitry Andric	.syntax unified
17*0b57cec5SDimitry Andric	.text
18*0b57cec5SDimitry Andric	DEFINE_CODE_STATE
19*0b57cec5SDimitry Andric
20*0b57cec5SDimitry Andric@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor,
21*0b57cec5SDimitry Andric@                           unsigned int *remainder)
22*0b57cec5SDimitry Andric@   Calculate the quotient and remainder of the (unsigned) division.  The return
23*0b57cec5SDimitry Andric@   value is the quotient, the remainder is placed in the variable.
24*0b57cec5SDimitry Andric
25*0b57cec5SDimitry Andric	.p2align 2
26*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
27*0b57cec5SDimitry Andric#if __ARM_ARCH_EXT_IDIV__
28*0b57cec5SDimitry Andric	tst     r1, r1
29*0b57cec5SDimitry Andric	beq     LOCAL_LABEL(divby0)
30*0b57cec5SDimitry Andric	mov 	r3, r0
31*0b57cec5SDimitry Andric	udiv	r0, r3, r1
32*0b57cec5SDimitry Andric	mls 	r1, r0, r1, r3
33*0b57cec5SDimitry Andric	str 	r1, [r2]
34*0b57cec5SDimitry Andric	bx  	lr
35*0b57cec5SDimitry Andric#else
36*0b57cec5SDimitry Andric	cmp	r1, #1
37*0b57cec5SDimitry Andric	bcc	LOCAL_LABEL(divby0)
38*0b57cec5SDimitry Andric	beq	LOCAL_LABEL(divby1)
39*0b57cec5SDimitry Andric	cmp	r0, r1
40*0b57cec5SDimitry Andric	bcc	LOCAL_LABEL(quotient0)
41*0b57cec5SDimitry Andric
42*0b57cec5SDimitry Andric	// Implement division using binary long division algorithm.
43*0b57cec5SDimitry Andric	//
44*0b57cec5SDimitry Andric	// r0 is the numerator, r1 the denominator.
45*0b57cec5SDimitry Andric	//
46*0b57cec5SDimitry Andric	// The code before JMP computes the correct shift I, so that
47*0b57cec5SDimitry Andric	// r0 and (r1 << I) have the highest bit set in the same position.
48*0b57cec5SDimitry Andric	// At the time of JMP, ip := .Ldiv0block - 12 * I.
49*0b57cec5SDimitry Andric	// This depends on the fixed instruction size of block.
50*0b57cec5SDimitry Andric	// For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
51*0b57cec5SDimitry Andric	//
52*0b57cec5SDimitry Andric	// block(shift) implements the test-and-update-quotient core.
53*0b57cec5SDimitry Andric	// It assumes (r0 << shift) can be computed without overflow and
54*0b57cec5SDimitry Andric	// that (r0 << shift) < 2 * r1. The quotient is stored in r3.
55*0b57cec5SDimitry Andric
56*0b57cec5SDimitry Andric#  ifdef __ARM_FEATURE_CLZ
57*0b57cec5SDimitry Andric	clz	ip, r0
58*0b57cec5SDimitry Andric	clz	r3, r1
59*0b57cec5SDimitry Andric	// r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
60*0b57cec5SDimitry Andric	sub	r3, r3, ip
61*0b57cec5SDimitry Andric#    if defined(USE_THUMB_2)
62*0b57cec5SDimitry Andric	adr	ip, LOCAL_LABEL(div0block) + 1
63*0b57cec5SDimitry Andric	sub	ip, ip, r3, lsl #1
64*0b57cec5SDimitry Andric#    else
65*0b57cec5SDimitry Andric	adr	ip, LOCAL_LABEL(div0block)
66*0b57cec5SDimitry Andric#    endif
67*0b57cec5SDimitry Andric	sub	ip, ip, r3, lsl #2
68*0b57cec5SDimitry Andric	sub	ip, ip, r3, lsl #3
69*0b57cec5SDimitry Andric	mov	r3, #0
70*0b57cec5SDimitry Andric	bx	ip
71*0b57cec5SDimitry Andric#  else
72*0b57cec5SDimitry Andric#    if defined(USE_THUMB_2)
73*0b57cec5SDimitry Andric#    error THUMB mode requires CLZ or UDIV
74*0b57cec5SDimitry Andric#    endif
75*0b57cec5SDimitry Andric	str	r4, [sp, #-8]!
76*0b57cec5SDimitry Andric
77*0b57cec5SDimitry Andric	mov	r4, r0
78*0b57cec5SDimitry Andric	adr	ip, LOCAL_LABEL(div0block)
79*0b57cec5SDimitry Andric
80*0b57cec5SDimitry Andric	lsr	r3, r4, #16
81*0b57cec5SDimitry Andric	cmp	r3, r1
82*0b57cec5SDimitry Andric	movhs	r4, r3
83*0b57cec5SDimitry Andric	subhs	ip, ip, #(16 * 12)
84*0b57cec5SDimitry Andric
85*0b57cec5SDimitry Andric	lsr	r3, r4, #8
86*0b57cec5SDimitry Andric	cmp	r3, r1
87*0b57cec5SDimitry Andric	movhs	r4, r3
88*0b57cec5SDimitry Andric	subhs	ip, ip, #(8 * 12)
89*0b57cec5SDimitry Andric
90*0b57cec5SDimitry Andric	lsr	r3, r4, #4
91*0b57cec5SDimitry Andric	cmp	r3, r1
92*0b57cec5SDimitry Andric	movhs	r4, r3
93*0b57cec5SDimitry Andric	subhs	ip, #(4 * 12)
94*0b57cec5SDimitry Andric
95*0b57cec5SDimitry Andric	lsr	r3, r4, #2
96*0b57cec5SDimitry Andric	cmp	r3, r1
97*0b57cec5SDimitry Andric	movhs	r4, r3
98*0b57cec5SDimitry Andric	subhs	ip, ip, #(2 * 12)
99*0b57cec5SDimitry Andric
100*0b57cec5SDimitry Andric	// Last block, no need to update r3 or r4.
101*0b57cec5SDimitry Andric	cmp	r1, r4, lsr #1
102*0b57cec5SDimitry Andric	subls	ip, ip, #(1 * 12)
103*0b57cec5SDimitry Andric
104*0b57cec5SDimitry Andric	ldr	r4, [sp], #8	// restore r4, we are done with it.
105*0b57cec5SDimitry Andric	mov	r3, #0
106*0b57cec5SDimitry Andric
107*0b57cec5SDimitry Andric	JMP(ip)
108*0b57cec5SDimitry Andric#  endif
109*0b57cec5SDimitry Andric
110*0b57cec5SDimitry Andric#define	IMM	#
111*0b57cec5SDimitry Andric
112*0b57cec5SDimitry Andric#define block(shift)                                                           \
113*0b57cec5SDimitry Andric	cmp	r0, r1, lsl IMM shift;                                         \
114*0b57cec5SDimitry Andric	ITT(hs);                                                               \
115*0b57cec5SDimitry Andric	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
116*0b57cec5SDimitry Andric	WIDE(subhs)	r0, r0, r1, lsl IMM shift
117*0b57cec5SDimitry Andric
118*0b57cec5SDimitry Andric	block(31)
119*0b57cec5SDimitry Andric	block(30)
120*0b57cec5SDimitry Andric	block(29)
121*0b57cec5SDimitry Andric	block(28)
122*0b57cec5SDimitry Andric	block(27)
123*0b57cec5SDimitry Andric	block(26)
124*0b57cec5SDimitry Andric	block(25)
125*0b57cec5SDimitry Andric	block(24)
126*0b57cec5SDimitry Andric	block(23)
127*0b57cec5SDimitry Andric	block(22)
128*0b57cec5SDimitry Andric	block(21)
129*0b57cec5SDimitry Andric	block(20)
130*0b57cec5SDimitry Andric	block(19)
131*0b57cec5SDimitry Andric	block(18)
132*0b57cec5SDimitry Andric	block(17)
133*0b57cec5SDimitry Andric	block(16)
134*0b57cec5SDimitry Andric	block(15)
135*0b57cec5SDimitry Andric	block(14)
136*0b57cec5SDimitry Andric	block(13)
137*0b57cec5SDimitry Andric	block(12)
138*0b57cec5SDimitry Andric	block(11)
139*0b57cec5SDimitry Andric	block(10)
140*0b57cec5SDimitry Andric	block(9)
141*0b57cec5SDimitry Andric	block(8)
142*0b57cec5SDimitry Andric	block(7)
143*0b57cec5SDimitry Andric	block(6)
144*0b57cec5SDimitry Andric	block(5)
145*0b57cec5SDimitry Andric	block(4)
146*0b57cec5SDimitry Andric	block(3)
147*0b57cec5SDimitry Andric	block(2)
148*0b57cec5SDimitry Andric	block(1)
149*0b57cec5SDimitry AndricLOCAL_LABEL(div0block):
150*0b57cec5SDimitry Andric	block(0)
151*0b57cec5SDimitry Andric
152*0b57cec5SDimitry Andric	str	r0, [r2]
153*0b57cec5SDimitry Andric	mov	r0, r3
154*0b57cec5SDimitry Andric	JMP(lr)
155*0b57cec5SDimitry Andric
156*0b57cec5SDimitry AndricLOCAL_LABEL(quotient0):
157*0b57cec5SDimitry Andric	str	r0, [r2]
158*0b57cec5SDimitry Andric	mov	r0, #0
159*0b57cec5SDimitry Andric	JMP(lr)
160*0b57cec5SDimitry Andric
161*0b57cec5SDimitry AndricLOCAL_LABEL(divby1):
162*0b57cec5SDimitry Andric	mov	r3, #0
163*0b57cec5SDimitry Andric	str	r3, [r2]
164*0b57cec5SDimitry Andric	JMP(lr)
165*0b57cec5SDimitry Andric#endif // __ARM_ARCH_EXT_IDIV__
166*0b57cec5SDimitry Andric
167*0b57cec5SDimitry AndricLOCAL_LABEL(divby0):
168*0b57cec5SDimitry Andric	mov	r0, #0
169*0b57cec5SDimitry Andric#ifdef __ARM_EABI__
170*0b57cec5SDimitry Andric	b	__aeabi_idiv0
171*0b57cec5SDimitry Andric#else
172*0b57cec5SDimitry Andric	JMP(lr)
173*0b57cec5SDimitry Andric#endif
174*0b57cec5SDimitry Andric
175*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__udivmodsi4)
176*0b57cec5SDimitry Andric
177*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE
178*0b57cec5SDimitry Andric
179