1//===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file implements the __udivsi3 (32-bit unsigned integer divide) 10// function for the ARM 32-bit architecture. 11// 12//===----------------------------------------------------------------------===// 13 14#include "../assembly.h" 15 16 .syntax unified 17 .text 18 19DEFINE_CODE_STATE 20 21 .p2align 2 22DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) 23 24@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) 25@ Calculate and return the quotient of the (unsigned) division. 26 27DEFINE_COMPILERRT_FUNCTION(__udivsi3) 28#if __ARM_ARCH_EXT_IDIV__ 29 tst r1, r1 30 beq LOCAL_LABEL(divby0) 31 udiv r0, r0, r1 32 bx lr 33 34LOCAL_LABEL(divby0): 35 // Use movs for compatibility with v8-m.base. 36 movs r0, #0 37# ifdef __ARM_EABI__ 38 b __aeabi_idiv0 39# else 40 JMP(lr) 41# endif 42 43#else // ! __ARM_ARCH_EXT_IDIV__ 44 cmp r1, #1 45 bcc LOCAL_LABEL(divby0) 46#if defined(USE_THUMB_1) 47 bne LOCAL_LABEL(num_neq_denom) 48 JMP(lr) 49LOCAL_LABEL(num_neq_denom): 50#else 51 IT(eq) 52 JMPc(lr, eq) 53#endif 54 cmp r0, r1 55#if defined(USE_THUMB_1) 56 bhs LOCAL_LABEL(num_ge_denom) 57 movs r0, #0 58 JMP(lr) 59LOCAL_LABEL(num_ge_denom): 60#else 61 ITT(cc) 62 movcc r0, #0 63 JMPc(lr, cc) 64#endif 65 66 // Implement division using binary long division algorithm. 67 // 68 // r0 is the numerator, r1 the denominator. 69 // 70 // The code before JMP computes the correct shift I, so that 71 // r0 and (r1 << I) have the highest bit set in the same position. 72 // At the time of JMP, ip := .Ldiv0block - 12 * I. 73 // This depends on the fixed instruction size of block. 74 // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. 75 // 76 // block(shift) implements the test-and-update-quotient core. 77 // It assumes (r0 << shift) can be computed without overflow and 78 // that (r0 << shift) < 2 * r1. The quotient is stored in r3. 79 80# if defined(__ARM_FEATURE_CLZ) 81 clz ip, r0 82 clz r3, r1 83 // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. 84 sub r3, r3, ip 85# if defined(USE_THUMB_2) 86 adr ip, LOCAL_LABEL(div0block) + 1 87 sub ip, ip, r3, lsl #1 88# else 89 adr ip, LOCAL_LABEL(div0block) 90# endif 91 sub ip, ip, r3, lsl #2 92 sub ip, ip, r3, lsl #3 93 mov r3, #0 94 bx ip 95# else // No CLZ Feature 96# if defined(USE_THUMB_2) 97# error THUMB mode requires CLZ or UDIV 98# endif 99# if defined(USE_THUMB_1) 100# define BLOCK_SIZE 10 101# else 102# define BLOCK_SIZE 12 103# endif 104 105 mov r2, r0 106# if defined(USE_THUMB_1) 107 mov ip, r0 108 adr r0, LOCAL_LABEL(div0block) 109 adds r0, #1 110# else 111 adr ip, LOCAL_LABEL(div0block) 112# endif 113 lsrs r3, r2, #16 114 cmp r3, r1 115# if defined(USE_THUMB_1) 116 blo LOCAL_LABEL(skip_16) 117 movs r2, r3 118 subs r0, r0, #(16 * BLOCK_SIZE) 119LOCAL_LABEL(skip_16): 120# else 121 movhs r2, r3 122 subhs ip, ip, #(16 * BLOCK_SIZE) 123# endif 124 125 lsrs r3, r2, #8 126 cmp r3, r1 127# if defined(USE_THUMB_1) 128 blo LOCAL_LABEL(skip_8) 129 movs r2, r3 130 subs r0, r0, #(8 * BLOCK_SIZE) 131LOCAL_LABEL(skip_8): 132# else 133 movhs r2, r3 134 subhs ip, ip, #(8 * BLOCK_SIZE) 135# endif 136 137 lsrs r3, r2, #4 138 cmp r3, r1 139# if defined(USE_THUMB_1) 140 blo LOCAL_LABEL(skip_4) 141 movs r2, r3 142 subs r0, r0, #(4 * BLOCK_SIZE) 143LOCAL_LABEL(skip_4): 144# else 145 movhs r2, r3 146 subhs ip, #(4 * BLOCK_SIZE) 147# endif 148 149 lsrs r3, r2, #2 150 cmp r3, r1 151# if defined(USE_THUMB_1) 152 blo LOCAL_LABEL(skip_2) 153 movs r2, r3 154 subs r0, r0, #(2 * BLOCK_SIZE) 155LOCAL_LABEL(skip_2): 156# else 157 movhs r2, r3 158 subhs ip, ip, #(2 * BLOCK_SIZE) 159# endif 160 161 // Last block, no need to update r2 or r3. 162# if defined(USE_THUMB_1) 163 lsrs r3, r2, #1 164 cmp r3, r1 165 blo LOCAL_LABEL(skip_1) 166 subs r0, r0, #(1 * BLOCK_SIZE) 167LOCAL_LABEL(skip_1): 168 movs r2, r0 169 mov r0, ip 170 movs r3, #0 171 JMP (r2) 172 173# else 174 cmp r1, r2, lsr #1 175 subls ip, ip, #(1 * BLOCK_SIZE) 176 177 movs r3, #0 178 179 JMP(ip) 180# endif 181# endif // __ARM_FEATURE_CLZ 182 183 184#define IMM # 185 // due to the range limit of branch in Thumb1, we have to place the 186 // block closer 187LOCAL_LABEL(divby0): 188 movs r0, #0 189# if defined(__ARM_EABI__) 190 push {r7, lr} 191 bl __aeabi_idiv0 // due to relocation limit, can't use b. 192 pop {r7, pc} 193# else 194 JMP(lr) 195# endif 196 197 198#if defined(USE_THUMB_1) 199#define block(shift) \ 200 lsls r2, r1, IMM shift; \ 201 cmp r0, r2; \ 202 blo LOCAL_LABEL(block_skip_##shift); \ 203 subs r0, r0, r2; \ 204 LOCAL_LABEL(block_skip_##shift) :; \ 205 adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. 206 207 // TODO: if current location counter is not not word aligned, we don't 208 // need the .p2align and nop 209 // Label div0block must be word-aligned. First align block 31 210 .p2align 2 211 nop // Padding to align div0block as 31 blocks = 310 bytes 212 213#else 214#define block(shift) \ 215 cmp r0, r1, lsl IMM shift; \ 216 ITT(hs); \ 217 WIDE(addhs) r3, r3, IMM (1 << shift); \ 218 WIDE(subhs) r0, r0, r1, lsl IMM shift 219#endif 220 221 block(31) 222 block(30) 223 block(29) 224 block(28) 225 block(27) 226 block(26) 227 block(25) 228 block(24) 229 block(23) 230 block(22) 231 block(21) 232 block(20) 233 block(19) 234 block(18) 235 block(17) 236 block(16) 237 block(15) 238 block(14) 239 block(13) 240 block(12) 241 block(11) 242 block(10) 243 block(9) 244 block(8) 245 block(7) 246 block(6) 247 block(5) 248 block(4) 249 block(3) 250 block(2) 251 block(1) 252LOCAL_LABEL(div0block): 253 block(0) 254 255 mov r0, r3 256 JMP(lr) 257#endif // __ARM_ARCH_EXT_IDIV__ 258 259END_COMPILERRT_FUNCTION(__udivsi3) 260 261NO_EXEC_STACK_DIRECTIVE 262 263