1*0b57cec5SDimitry Andric//===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 2*0b57cec5SDimitry Andric// 3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric// 7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric// 9*0b57cec5SDimitry Andric// This file implements the __udivmodsi4 (32-bit unsigned integer divide and 10*0b57cec5SDimitry Andric// modulus) function for the ARM 32-bit architecture. 11*0b57cec5SDimitry Andric// 12*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric#include "../assembly.h" 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric .syntax unified 17*0b57cec5SDimitry Andric .text 18*0b57cec5SDimitry Andric DEFINE_CODE_STATE 19*0b57cec5SDimitry Andric 20*0b57cec5SDimitry Andric@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, 21*0b57cec5SDimitry Andric@ unsigned int *remainder) 22*0b57cec5SDimitry Andric@ Calculate the quotient and remainder of the (unsigned) division. The return 23*0b57cec5SDimitry Andric@ value is the quotient, the remainder is placed in the variable. 24*0b57cec5SDimitry Andric 25*0b57cec5SDimitry Andric .p2align 2 26*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 27*0b57cec5SDimitry Andric#if __ARM_ARCH_EXT_IDIV__ 28*0b57cec5SDimitry Andric tst r1, r1 29*0b57cec5SDimitry Andric beq LOCAL_LABEL(divby0) 30*0b57cec5SDimitry Andric mov r3, r0 31*0b57cec5SDimitry Andric udiv r0, r3, r1 32*0b57cec5SDimitry Andric mls r1, r0, r1, r3 33*0b57cec5SDimitry Andric str r1, [r2] 34*0b57cec5SDimitry Andric bx lr 35*0b57cec5SDimitry Andric#else 36*0b57cec5SDimitry Andric cmp r1, #1 37*0b57cec5SDimitry Andric bcc LOCAL_LABEL(divby0) 38*0b57cec5SDimitry Andric beq LOCAL_LABEL(divby1) 39*0b57cec5SDimitry Andric cmp r0, r1 40*0b57cec5SDimitry Andric bcc LOCAL_LABEL(quotient0) 41*0b57cec5SDimitry Andric 42*0b57cec5SDimitry Andric // Implement division using binary long division algorithm. 43*0b57cec5SDimitry Andric // 44*0b57cec5SDimitry Andric // r0 is the numerator, r1 the denominator. 45*0b57cec5SDimitry Andric // 46*0b57cec5SDimitry Andric // The code before JMP computes the correct shift I, so that 47*0b57cec5SDimitry Andric // r0 and (r1 << I) have the highest bit set in the same position. 48*0b57cec5SDimitry Andric // At the time of JMP, ip := .Ldiv0block - 12 * I. 49*0b57cec5SDimitry Andric // This depends on the fixed instruction size of block. 50*0b57cec5SDimitry Andric // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 51*0b57cec5SDimitry Andric // 52*0b57cec5SDimitry Andric // block(shift) implements the test-and-update-quotient core. 53*0b57cec5SDimitry Andric // It assumes (r0 << shift) can be computed without overflow and 54*0b57cec5SDimitry Andric // that (r0 << shift) < 2 * r1. The quotient is stored in r3. 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric# ifdef __ARM_FEATURE_CLZ 57*0b57cec5SDimitry Andric clz ip, r0 58*0b57cec5SDimitry Andric clz r3, r1 59*0b57cec5SDimitry Andric // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. 60*0b57cec5SDimitry Andric sub r3, r3, ip 61*0b57cec5SDimitry Andric# if defined(USE_THUMB_2) 62*0b57cec5SDimitry Andric adr ip, LOCAL_LABEL(div0block) + 1 63*0b57cec5SDimitry Andric sub ip, ip, r3, lsl #1 64*0b57cec5SDimitry Andric# else 65*0b57cec5SDimitry Andric adr ip, LOCAL_LABEL(div0block) 66*0b57cec5SDimitry Andric# endif 67*0b57cec5SDimitry Andric sub ip, ip, r3, lsl #2 68*0b57cec5SDimitry Andric sub ip, ip, r3, lsl #3 69*0b57cec5SDimitry Andric mov r3, #0 70*0b57cec5SDimitry Andric bx ip 71*0b57cec5SDimitry Andric# else 72*0b57cec5SDimitry Andric# if defined(USE_THUMB_2) 73*0b57cec5SDimitry Andric# error THUMB mode requires CLZ or UDIV 74*0b57cec5SDimitry Andric# endif 75*0b57cec5SDimitry Andric str r4, [sp, #-8]! 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric mov r4, r0 78*0b57cec5SDimitry Andric adr ip, LOCAL_LABEL(div0block) 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric lsr r3, r4, #16 81*0b57cec5SDimitry Andric cmp r3, r1 82*0b57cec5SDimitry Andric movhs r4, r3 83*0b57cec5SDimitry Andric subhs ip, ip, #(16 * 12) 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric lsr r3, r4, #8 86*0b57cec5SDimitry Andric cmp r3, r1 87*0b57cec5SDimitry Andric movhs r4, r3 88*0b57cec5SDimitry Andric subhs ip, ip, #(8 * 12) 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric lsr r3, r4, #4 91*0b57cec5SDimitry Andric cmp r3, r1 92*0b57cec5SDimitry Andric movhs r4, r3 93*0b57cec5SDimitry Andric subhs ip, #(4 * 12) 94*0b57cec5SDimitry Andric 95*0b57cec5SDimitry Andric lsr r3, r4, #2 96*0b57cec5SDimitry Andric cmp r3, r1 97*0b57cec5SDimitry Andric movhs r4, r3 98*0b57cec5SDimitry Andric subhs ip, ip, #(2 * 12) 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric // Last block, no need to update r3 or r4. 101*0b57cec5SDimitry Andric cmp r1, r4, lsr #1 102*0b57cec5SDimitry Andric subls ip, ip, #(1 * 12) 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric ldr r4, [sp], #8 // restore r4, we are done with it. 105*0b57cec5SDimitry Andric mov r3, #0 106*0b57cec5SDimitry Andric 107*0b57cec5SDimitry Andric JMP(ip) 108*0b57cec5SDimitry Andric# endif 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric#define IMM # 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric#define block(shift) \ 113*0b57cec5SDimitry Andric cmp r0, r1, lsl IMM shift; \ 114*0b57cec5SDimitry Andric ITT(hs); \ 115*0b57cec5SDimitry Andric WIDE(addhs) r3, r3, IMM (1 << shift); \ 116*0b57cec5SDimitry Andric WIDE(subhs) r0, r0, r1, lsl IMM shift 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric block(31) 119*0b57cec5SDimitry Andric block(30) 120*0b57cec5SDimitry Andric block(29) 121*0b57cec5SDimitry Andric block(28) 122*0b57cec5SDimitry Andric block(27) 123*0b57cec5SDimitry Andric block(26) 124*0b57cec5SDimitry Andric block(25) 125*0b57cec5SDimitry Andric block(24) 126*0b57cec5SDimitry Andric block(23) 127*0b57cec5SDimitry Andric block(22) 128*0b57cec5SDimitry Andric block(21) 129*0b57cec5SDimitry Andric block(20) 130*0b57cec5SDimitry Andric block(19) 131*0b57cec5SDimitry Andric block(18) 132*0b57cec5SDimitry Andric block(17) 133*0b57cec5SDimitry Andric block(16) 134*0b57cec5SDimitry Andric block(15) 135*0b57cec5SDimitry Andric block(14) 136*0b57cec5SDimitry Andric block(13) 137*0b57cec5SDimitry Andric block(12) 138*0b57cec5SDimitry Andric block(11) 139*0b57cec5SDimitry Andric block(10) 140*0b57cec5SDimitry Andric block(9) 141*0b57cec5SDimitry Andric block(8) 142*0b57cec5SDimitry Andric block(7) 143*0b57cec5SDimitry Andric block(6) 144*0b57cec5SDimitry Andric block(5) 145*0b57cec5SDimitry Andric block(4) 146*0b57cec5SDimitry Andric block(3) 147*0b57cec5SDimitry Andric block(2) 148*0b57cec5SDimitry Andric block(1) 149*0b57cec5SDimitry AndricLOCAL_LABEL(div0block): 150*0b57cec5SDimitry Andric block(0) 151*0b57cec5SDimitry Andric 152*0b57cec5SDimitry Andric str r0, [r2] 153*0b57cec5SDimitry Andric mov r0, r3 154*0b57cec5SDimitry Andric JMP(lr) 155*0b57cec5SDimitry Andric 156*0b57cec5SDimitry AndricLOCAL_LABEL(quotient0): 157*0b57cec5SDimitry Andric str r0, [r2] 158*0b57cec5SDimitry Andric mov r0, #0 159*0b57cec5SDimitry Andric JMP(lr) 160*0b57cec5SDimitry Andric 161*0b57cec5SDimitry AndricLOCAL_LABEL(divby1): 162*0b57cec5SDimitry Andric mov r3, #0 163*0b57cec5SDimitry Andric str r3, [r2] 164*0b57cec5SDimitry Andric JMP(lr) 165*0b57cec5SDimitry Andric#endif // __ARM_ARCH_EXT_IDIV__ 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry AndricLOCAL_LABEL(divby0): 168*0b57cec5SDimitry Andric mov r0, #0 169*0b57cec5SDimitry Andric#ifdef __ARM_EABI__ 170*0b57cec5SDimitry Andric b __aeabi_idiv0 171*0b57cec5SDimitry Andric#else 172*0b57cec5SDimitry Andric JMP(lr) 173*0b57cec5SDimitry Andric#endif 174*0b57cec5SDimitry Andric 175*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__udivmodsi4) 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE 178*0b57cec5SDimitry Andric 179