1*0b57cec5SDimitry Andric//===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// 2*0b57cec5SDimitry Andric// 3*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric// 7*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric// 9*0b57cec5SDimitry Andric// This file implements the __umodsi3 (32-bit unsigned integer modulus) 10*0b57cec5SDimitry Andric// function for the ARM 32-bit architecture. 11*0b57cec5SDimitry Andric// 12*0b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric#include "../assembly.h" 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric .syntax unified 17*0b57cec5SDimitry Andric .text 18*0b57cec5SDimitry Andric DEFINE_CODE_STATE 19*0b57cec5SDimitry Andric 20*0b57cec5SDimitry Andric@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) 21*0b57cec5SDimitry Andric@ Calculate and return the remainder of the (unsigned) division. 22*0b57cec5SDimitry Andric 23*0b57cec5SDimitry Andric .p2align 2 24*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__umodsi3) 25*0b57cec5SDimitry Andric#if __ARM_ARCH_EXT_IDIV__ 26*0b57cec5SDimitry Andric tst r1, r1 27*0b57cec5SDimitry Andric beq LOCAL_LABEL(divby0) 28*0b57cec5SDimitry Andric udiv r2, r0, r1 29*0b57cec5SDimitry Andric mls r0, r2, r1, r0 30*0b57cec5SDimitry Andric bx lr 31*0b57cec5SDimitry Andric#else 32*0b57cec5SDimitry Andric cmp r1, #1 33*0b57cec5SDimitry Andric bcc LOCAL_LABEL(divby0) 34*0b57cec5SDimitry Andric ITT(eq) 35*0b57cec5SDimitry Andric moveq r0, #0 36*0b57cec5SDimitry Andric JMPc(lr, eq) 37*0b57cec5SDimitry Andric cmp r0, r1 38*0b57cec5SDimitry Andric IT(cc) 39*0b57cec5SDimitry Andric JMPc(lr, cc) 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric // Implement division using binary long division algorithm. 42*0b57cec5SDimitry Andric // 43*0b57cec5SDimitry Andric // r0 is the numerator, r1 the denominator. 44*0b57cec5SDimitry Andric // 45*0b57cec5SDimitry Andric // The code before JMP computes the correct shift I, so that 46*0b57cec5SDimitry Andric // r0 and (r1 << I) have the highest bit set in the same position. 47*0b57cec5SDimitry Andric // At the time of JMP, ip := .Ldiv0block - 8 * I. 48*0b57cec5SDimitry Andric // This depends on the fixed instruction size of block. 49*0b57cec5SDimitry Andric // For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. 50*0b57cec5SDimitry Andric // 51*0b57cec5SDimitry Andric // block(shift) implements the test-and-update-quotient core. 52*0b57cec5SDimitry Andric // It assumes (r0 << shift) can be computed without overflow and 53*0b57cec5SDimitry Andric // that (r0 << shift) < 2 * r1. The quotient is stored in r3. 54*0b57cec5SDimitry Andric 55*0b57cec5SDimitry Andric# ifdef __ARM_FEATURE_CLZ 56*0b57cec5SDimitry Andric clz ip, r0 57*0b57cec5SDimitry Andric clz r3, r1 58*0b57cec5SDimitry Andric // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. 59*0b57cec5SDimitry Andric sub r3, r3, ip 60*0b57cec5SDimitry Andric# if defined(USE_THUMB_2) 61*0b57cec5SDimitry Andric adr ip, LOCAL_LABEL(div0block) + 1 62*0b57cec5SDimitry Andric sub ip, ip, r3, lsl #1 63*0b57cec5SDimitry Andric# else 64*0b57cec5SDimitry Andric adr ip, LOCAL_LABEL(div0block) 65*0b57cec5SDimitry Andric# endif 66*0b57cec5SDimitry Andric sub ip, ip, r3, lsl #3 67*0b57cec5SDimitry Andric bx ip 68*0b57cec5SDimitry Andric# else 69*0b57cec5SDimitry Andric# if defined(USE_THUMB_2) 70*0b57cec5SDimitry Andric# error THUMB mode requires CLZ or UDIV 71*0b57cec5SDimitry Andric# endif 72*0b57cec5SDimitry Andric mov r2, r0 73*0b57cec5SDimitry Andric adr ip, LOCAL_LABEL(div0block) 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric lsr r3, r2, #16 76*0b57cec5SDimitry Andric cmp r3, r1 77*0b57cec5SDimitry Andric movhs r2, r3 78*0b57cec5SDimitry Andric subhs ip, ip, #(16 * 8) 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric lsr r3, r2, #8 81*0b57cec5SDimitry Andric cmp r3, r1 82*0b57cec5SDimitry Andric movhs r2, r3 83*0b57cec5SDimitry Andric subhs ip, ip, #(8 * 8) 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric lsr r3, r2, #4 86*0b57cec5SDimitry Andric cmp r3, r1 87*0b57cec5SDimitry Andric movhs r2, r3 88*0b57cec5SDimitry Andric subhs ip, #(4 * 8) 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric lsr r3, r2, #2 91*0b57cec5SDimitry Andric cmp r3, r1 92*0b57cec5SDimitry Andric movhs r2, r3 93*0b57cec5SDimitry Andric subhs ip, ip, #(2 * 8) 94*0b57cec5SDimitry Andric 95*0b57cec5SDimitry Andric // Last block, no need to update r2 or r3. 96*0b57cec5SDimitry Andric cmp r1, r2, lsr #1 97*0b57cec5SDimitry Andric subls ip, ip, #(1 * 8) 98*0b57cec5SDimitry Andric 99*0b57cec5SDimitry Andric JMP(ip) 100*0b57cec5SDimitry Andric# endif 101*0b57cec5SDimitry Andric 102*0b57cec5SDimitry Andric#define IMM # 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric#define block(shift) \ 105*0b57cec5SDimitry Andric cmp r0, r1, lsl IMM shift; \ 106*0b57cec5SDimitry Andric IT(hs); \ 107*0b57cec5SDimitry Andric WIDE(subhs) r0, r0, r1, lsl IMM shift 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric block(31) 110*0b57cec5SDimitry Andric block(30) 111*0b57cec5SDimitry Andric block(29) 112*0b57cec5SDimitry Andric block(28) 113*0b57cec5SDimitry Andric block(27) 114*0b57cec5SDimitry Andric block(26) 115*0b57cec5SDimitry Andric block(25) 116*0b57cec5SDimitry Andric block(24) 117*0b57cec5SDimitry Andric block(23) 118*0b57cec5SDimitry Andric block(22) 119*0b57cec5SDimitry Andric block(21) 120*0b57cec5SDimitry Andric block(20) 121*0b57cec5SDimitry Andric block(19) 122*0b57cec5SDimitry Andric block(18) 123*0b57cec5SDimitry Andric block(17) 124*0b57cec5SDimitry Andric block(16) 125*0b57cec5SDimitry Andric block(15) 126*0b57cec5SDimitry Andric block(14) 127*0b57cec5SDimitry Andric block(13) 128*0b57cec5SDimitry Andric block(12) 129*0b57cec5SDimitry Andric block(11) 130*0b57cec5SDimitry Andric block(10) 131*0b57cec5SDimitry Andric block(9) 132*0b57cec5SDimitry Andric block(8) 133*0b57cec5SDimitry Andric block(7) 134*0b57cec5SDimitry Andric block(6) 135*0b57cec5SDimitry Andric block(5) 136*0b57cec5SDimitry Andric block(4) 137*0b57cec5SDimitry Andric block(3) 138*0b57cec5SDimitry Andric block(2) 139*0b57cec5SDimitry Andric block(1) 140*0b57cec5SDimitry AndricLOCAL_LABEL(div0block): 141*0b57cec5SDimitry Andric block(0) 142*0b57cec5SDimitry Andric JMP(lr) 143*0b57cec5SDimitry Andric#endif // __ARM_ARCH_EXT_IDIV__ 144*0b57cec5SDimitry Andric 145*0b57cec5SDimitry AndricLOCAL_LABEL(divby0): 146*0b57cec5SDimitry Andric mov r0, #0 147*0b57cec5SDimitry Andric#ifdef __ARM_EABI__ 148*0b57cec5SDimitry Andric b __aeabi_idiv0 149*0b57cec5SDimitry Andric#else 150*0b57cec5SDimitry Andric JMP(lr) 151*0b57cec5SDimitry Andric#endif 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__umodsi3) 154*0b57cec5SDimitry Andric 155*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE 156*0b57cec5SDimitry Andric 157