xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/arm/umodsi3.S (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric//===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
2*0b57cec5SDimitry Andric//
3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric//
7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric//
9*0b57cec5SDimitry Andric// This file implements the __umodsi3 (32-bit unsigned integer modulus)
10*0b57cec5SDimitry Andric// function for the ARM 32-bit architecture.
11*0b57cec5SDimitry Andric//
12*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric
14*0b57cec5SDimitry Andric#include "../assembly.h"
15*0b57cec5SDimitry Andric
16*0b57cec5SDimitry Andric	.syntax unified
17*0b57cec5SDimitry Andric	.text
18*0b57cec5SDimitry Andric	DEFINE_CODE_STATE
19*0b57cec5SDimitry Andric
20*0b57cec5SDimitry Andric@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor)
21*0b57cec5SDimitry Andric@   Calculate and return the remainder of the (unsigned) division.
22*0b57cec5SDimitry Andric
23*0b57cec5SDimitry Andric	.p2align 2
24*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__umodsi3)
25*0b57cec5SDimitry Andric#if __ARM_ARCH_EXT_IDIV__
26*0b57cec5SDimitry Andric	tst     r1, r1
27*0b57cec5SDimitry Andric	beq     LOCAL_LABEL(divby0)
28*0b57cec5SDimitry Andric	udiv	r2, r0, r1
29*0b57cec5SDimitry Andric	mls 	r0, r2, r1, r0
30*0b57cec5SDimitry Andric	bx  	lr
31*0b57cec5SDimitry Andric#else
32*0b57cec5SDimitry Andric	cmp	r1, #1
33*0b57cec5SDimitry Andric	bcc	LOCAL_LABEL(divby0)
34*0b57cec5SDimitry Andric	ITT(eq)
35*0b57cec5SDimitry Andric	moveq	r0, #0
36*0b57cec5SDimitry Andric	JMPc(lr, eq)
37*0b57cec5SDimitry Andric	cmp	r0, r1
38*0b57cec5SDimitry Andric	IT(cc)
39*0b57cec5SDimitry Andric	JMPc(lr, cc)
40*0b57cec5SDimitry Andric
41*0b57cec5SDimitry Andric	// Implement division using binary long division algorithm.
42*0b57cec5SDimitry Andric	//
43*0b57cec5SDimitry Andric	// r0 is the numerator, r1 the denominator.
44*0b57cec5SDimitry Andric	//
45*0b57cec5SDimitry Andric	// The code before JMP computes the correct shift I, so that
46*0b57cec5SDimitry Andric	// r0 and (r1 << I) have the highest bit set in the same position.
47*0b57cec5SDimitry Andric	// At the time of JMP, ip := .Ldiv0block - 8 * I.
48*0b57cec5SDimitry Andric	// This depends on the fixed instruction size of block.
49*0b57cec5SDimitry Andric	// For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes.
50*0b57cec5SDimitry Andric	//
51*0b57cec5SDimitry Andric	// block(shift) implements the test-and-update-quotient core.
52*0b57cec5SDimitry Andric	// It assumes (r0 << shift) can be computed without overflow and
53*0b57cec5SDimitry Andric	// that (r0 << shift) < 2 * r1. The quotient is stored in r3.
54*0b57cec5SDimitry Andric
55*0b57cec5SDimitry Andric#  ifdef __ARM_FEATURE_CLZ
56*0b57cec5SDimitry Andric	clz	ip, r0
57*0b57cec5SDimitry Andric	clz	r3, r1
58*0b57cec5SDimitry Andric	// r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3.
59*0b57cec5SDimitry Andric	sub	r3, r3, ip
60*0b57cec5SDimitry Andric#    if defined(USE_THUMB_2)
61*0b57cec5SDimitry Andric	adr	ip, LOCAL_LABEL(div0block) + 1
62*0b57cec5SDimitry Andric	sub	ip, ip, r3, lsl #1
63*0b57cec5SDimitry Andric#    else
64*0b57cec5SDimitry Andric	adr	ip, LOCAL_LABEL(div0block)
65*0b57cec5SDimitry Andric#    endif
66*0b57cec5SDimitry Andric	sub	ip, ip, r3, lsl #3
67*0b57cec5SDimitry Andric	bx	ip
68*0b57cec5SDimitry Andric#  else
69*0b57cec5SDimitry Andric#    if defined(USE_THUMB_2)
70*0b57cec5SDimitry Andric#    error THUMB mode requires CLZ or UDIV
71*0b57cec5SDimitry Andric#    endif
72*0b57cec5SDimitry Andric	mov	r2, r0
73*0b57cec5SDimitry Andric	adr	ip, LOCAL_LABEL(div0block)
74*0b57cec5SDimitry Andric
75*0b57cec5SDimitry Andric	lsr	r3, r2, #16
76*0b57cec5SDimitry Andric	cmp	r3, r1
77*0b57cec5SDimitry Andric	movhs	r2, r3
78*0b57cec5SDimitry Andric	subhs	ip, ip, #(16 * 8)
79*0b57cec5SDimitry Andric
80*0b57cec5SDimitry Andric	lsr	r3, r2, #8
81*0b57cec5SDimitry Andric	cmp	r3, r1
82*0b57cec5SDimitry Andric	movhs	r2, r3
83*0b57cec5SDimitry Andric	subhs	ip, ip, #(8 * 8)
84*0b57cec5SDimitry Andric
85*0b57cec5SDimitry Andric	lsr	r3, r2, #4
86*0b57cec5SDimitry Andric	cmp	r3, r1
87*0b57cec5SDimitry Andric	movhs	r2, r3
88*0b57cec5SDimitry Andric	subhs	ip, #(4 * 8)
89*0b57cec5SDimitry Andric
90*0b57cec5SDimitry Andric	lsr	r3, r2, #2
91*0b57cec5SDimitry Andric	cmp	r3, r1
92*0b57cec5SDimitry Andric	movhs	r2, r3
93*0b57cec5SDimitry Andric	subhs	ip, ip, #(2 * 8)
94*0b57cec5SDimitry Andric
95*0b57cec5SDimitry Andric	// Last block, no need to update r2 or r3.
96*0b57cec5SDimitry Andric	cmp	r1, r2, lsr #1
97*0b57cec5SDimitry Andric	subls	ip, ip, #(1 * 8)
98*0b57cec5SDimitry Andric
99*0b57cec5SDimitry Andric	JMP(ip)
100*0b57cec5SDimitry Andric#  endif
101*0b57cec5SDimitry Andric
102*0b57cec5SDimitry Andric#define	IMM	#
103*0b57cec5SDimitry Andric
104*0b57cec5SDimitry Andric#define block(shift)                                                           \
105*0b57cec5SDimitry Andric	cmp	r0, r1, lsl IMM shift;                                         \
106*0b57cec5SDimitry Andric	IT(hs);                                                                \
107*0b57cec5SDimitry Andric	WIDE(subhs)	r0, r0, r1, lsl IMM shift
108*0b57cec5SDimitry Andric
109*0b57cec5SDimitry Andric	block(31)
110*0b57cec5SDimitry Andric	block(30)
111*0b57cec5SDimitry Andric	block(29)
112*0b57cec5SDimitry Andric	block(28)
113*0b57cec5SDimitry Andric	block(27)
114*0b57cec5SDimitry Andric	block(26)
115*0b57cec5SDimitry Andric	block(25)
116*0b57cec5SDimitry Andric	block(24)
117*0b57cec5SDimitry Andric	block(23)
118*0b57cec5SDimitry Andric	block(22)
119*0b57cec5SDimitry Andric	block(21)
120*0b57cec5SDimitry Andric	block(20)
121*0b57cec5SDimitry Andric	block(19)
122*0b57cec5SDimitry Andric	block(18)
123*0b57cec5SDimitry Andric	block(17)
124*0b57cec5SDimitry Andric	block(16)
125*0b57cec5SDimitry Andric	block(15)
126*0b57cec5SDimitry Andric	block(14)
127*0b57cec5SDimitry Andric	block(13)
128*0b57cec5SDimitry Andric	block(12)
129*0b57cec5SDimitry Andric	block(11)
130*0b57cec5SDimitry Andric	block(10)
131*0b57cec5SDimitry Andric	block(9)
132*0b57cec5SDimitry Andric	block(8)
133*0b57cec5SDimitry Andric	block(7)
134*0b57cec5SDimitry Andric	block(6)
135*0b57cec5SDimitry Andric	block(5)
136*0b57cec5SDimitry Andric	block(4)
137*0b57cec5SDimitry Andric	block(3)
138*0b57cec5SDimitry Andric	block(2)
139*0b57cec5SDimitry Andric	block(1)
140*0b57cec5SDimitry AndricLOCAL_LABEL(div0block):
141*0b57cec5SDimitry Andric	block(0)
142*0b57cec5SDimitry Andric	JMP(lr)
143*0b57cec5SDimitry Andric#endif // __ARM_ARCH_EXT_IDIV__
144*0b57cec5SDimitry Andric
145*0b57cec5SDimitry AndricLOCAL_LABEL(divby0):
146*0b57cec5SDimitry Andric	mov	r0, #0
147*0b57cec5SDimitry Andric#ifdef __ARM_EABI__
148*0b57cec5SDimitry Andric	b	__aeabi_idiv0
149*0b57cec5SDimitry Andric#else
150*0b57cec5SDimitry Andric	JMP(lr)
151*0b57cec5SDimitry Andric#endif
152*0b57cec5SDimitry Andric
153*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__umodsi3)
154*0b57cec5SDimitry Andric
155*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE
156*0b57cec5SDimitry Andric
157