xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/arm/udivsi3.S (revision d13def78ccef6dbc25c2e197089ee5fc4d7b82c3)
1//===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the __udivsi3 (32-bit unsigned integer divide)
10// function for the ARM 32-bit architecture.
11//
12//===----------------------------------------------------------------------===//
13
14#include "../assembly.h"
15
16	.syntax unified
17	.text
18
19DEFINE_CODE_STATE
20
21	.p2align 2
22DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
23
24@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
25@   Calculate and return the quotient of the (unsigned) division.
26
27DEFINE_COMPILERRT_FUNCTION(__udivsi3)
28#if __ARM_ARCH_EXT_IDIV__
29	tst     r1, r1
30	beq     LOCAL_LABEL(divby0)
31	udiv	r0, r0, r1
32	bx  	lr
33
34LOCAL_LABEL(divby0):
35	mov     r0, #0
36#  ifdef __ARM_EABI__
37	b       __aeabi_idiv0
38#  else
39	JMP(lr)
40#  endif
41
42#else // ! __ARM_ARCH_EXT_IDIV__
43	cmp	r1, #1
44	bcc	LOCAL_LABEL(divby0)
45#if defined(USE_THUMB_1)
46	bne LOCAL_LABEL(num_neq_denom)
47	JMP(lr)
48LOCAL_LABEL(num_neq_denom):
49#else
50	IT(eq)
51	JMPc(lr, eq)
52#endif
53	cmp	r0, r1
54#if defined(USE_THUMB_1)
55	bhs LOCAL_LABEL(num_ge_denom)
56	movs r0, #0
57	JMP(lr)
58LOCAL_LABEL(num_ge_denom):
59#else
60	ITT(cc)
61	movcc	r0, #0
62	JMPc(lr, cc)
63#endif
64
65	// Implement division using binary long division algorithm.
66	//
67	// r0 is the numerator, r1 the denominator.
68	//
69	// The code before JMP computes the correct shift I, so that
70	// r0 and (r1 << I) have the highest bit set in the same position.
71	// At the time of JMP, ip := .Ldiv0block - 12 * I.
72	// This depends on the fixed instruction size of block.
73	// For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
74	//
75	// block(shift) implements the test-and-update-quotient core.
76	// It assumes (r0 << shift) can be computed without overflow and
77	// that (r0 << shift) < 2 * r1. The quotient is stored in r3.
78
79#  if defined(__ARM_FEATURE_CLZ)
80	clz	ip, r0
81	clz	r3, r1
82	// r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
83	sub	r3, r3, ip
84#    if defined(USE_THUMB_2)
85	adr	ip, LOCAL_LABEL(div0block) + 1
86	sub	ip, ip, r3, lsl #1
87#    else
88	adr	ip, LOCAL_LABEL(div0block)
89#    endif
90	sub	ip, ip, r3, lsl #2
91	sub	ip, ip, r3, lsl #3
92	mov	r3, #0
93	bx	ip
94#  else // No CLZ Feature
95#    if defined(USE_THUMB_2)
96#    error THUMB mode requires CLZ or UDIV
97#    endif
98#    if defined(USE_THUMB_1)
99#      define BLOCK_SIZE 10
100#    else
101#      define BLOCK_SIZE 12
102#    endif
103
104	mov	r2, r0
105#    if defined(USE_THUMB_1)
106	mov ip, r0
107	adr r0, LOCAL_LABEL(div0block)
108	adds r0, #1
109#    else
110	adr	ip, LOCAL_LABEL(div0block)
111#    endif
112	lsrs	r3, r2, #16
113	cmp	r3, r1
114#    if defined(USE_THUMB_1)
115	blo LOCAL_LABEL(skip_16)
116	movs r2, r3
117	subs r0, r0, #(16 * BLOCK_SIZE)
118LOCAL_LABEL(skip_16):
119#    else
120	movhs	r2, r3
121	subhs	ip, ip, #(16 * BLOCK_SIZE)
122#    endif
123
124	lsrs	r3, r2, #8
125	cmp	r3, r1
126#    if defined(USE_THUMB_1)
127	blo LOCAL_LABEL(skip_8)
128	movs r2, r3
129	subs r0, r0, #(8 * BLOCK_SIZE)
130LOCAL_LABEL(skip_8):
131#    else
132	movhs	r2, r3
133	subhs	ip, ip, #(8 * BLOCK_SIZE)
134#    endif
135
136	lsrs	r3, r2, #4
137	cmp	r3, r1
138#    if defined(USE_THUMB_1)
139	blo LOCAL_LABEL(skip_4)
140	movs r2, r3
141	subs r0, r0, #(4 * BLOCK_SIZE)
142LOCAL_LABEL(skip_4):
143#    else
144	movhs	r2, r3
145	subhs	ip, #(4 * BLOCK_SIZE)
146#    endif
147
148	lsrs	r3, r2, #2
149	cmp	r3, r1
150#    if defined(USE_THUMB_1)
151	blo LOCAL_LABEL(skip_2)
152	movs r2, r3
153	subs r0, r0, #(2 * BLOCK_SIZE)
154LOCAL_LABEL(skip_2):
155#    else
156	movhs	r2, r3
157	subhs	ip, ip, #(2 * BLOCK_SIZE)
158#    endif
159
160	// Last block, no need to update r2 or r3.
161#    if defined(USE_THUMB_1)
162	lsrs r3, r2, #1
163	cmp r3, r1
164	blo LOCAL_LABEL(skip_1)
165	subs r0, r0, #(1 * BLOCK_SIZE)
166LOCAL_LABEL(skip_1):
167	movs r2, r0
168	mov r0, ip
169	movs r3, #0
170	JMP (r2)
171
172#    else
173	cmp	r1, r2, lsr #1
174	subls	ip, ip, #(1 * BLOCK_SIZE)
175
176	movs	r3, #0
177
178	JMP(ip)
179#    endif
180#  endif // __ARM_FEATURE_CLZ
181
182
183#define	IMM	#
184	// due to the range limit of branch in Thumb1, we have to place the
185	// block closer
186LOCAL_LABEL(divby0):
187	movs	r0, #0
188#      if defined(__ARM_EABI__)
189	push {r7, lr}
190	bl	__aeabi_idiv0 // due to relocation limit, can't use b.
191	pop  {r7, pc}
192#      else
193	JMP(lr)
194#      endif
195
196
197#if defined(USE_THUMB_1)
198#define block(shift)                                                           \
199	lsls r2, r1, IMM shift;                                                      \
200	cmp r0, r2;                                                                  \
201	blo LOCAL_LABEL(block_skip_##shift);                                         \
202	subs r0, r0, r2;                                                             \
203	LOCAL_LABEL(block_skip_##shift) :;                                           \
204	adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2.
205
206	// TODO: if current location counter is not not word aligned, we don't
207	// need the .p2align and nop
208	// Label div0block must be word-aligned. First align block 31
209	.p2align 2
210	nop // Padding to align div0block as 31 blocks = 310 bytes
211
212#else
213#define block(shift)                                                           \
214	cmp	r0, r1, lsl IMM shift;                                         \
215	ITT(hs);                                                               \
216	WIDE(addhs)	r3, r3, IMM (1 << shift);                              \
217	WIDE(subhs)	r0, r0, r1, lsl IMM shift
218#endif
219
220	block(31)
221	block(30)
222	block(29)
223	block(28)
224	block(27)
225	block(26)
226	block(25)
227	block(24)
228	block(23)
229	block(22)
230	block(21)
231	block(20)
232	block(19)
233	block(18)
234	block(17)
235	block(16)
236	block(15)
237	block(14)
238	block(13)
239	block(12)
240	block(11)
241	block(10)
242	block(9)
243	block(8)
244	block(7)
245	block(6)
246	block(5)
247	block(4)
248	block(3)
249	block(2)
250	block(1)
251LOCAL_LABEL(div0block):
252	block(0)
253
254	mov	r0, r3
255	JMP(lr)
256#endif // __ARM_ARCH_EXT_IDIV__
257
258END_COMPILERRT_FUNCTION(__udivsi3)
259
260NO_EXEC_STACK_DIRECTIVE
261
262