1/* 2 * This m4 code has been taken from The SPARC Architecture Manual Version 8. 3 */ 4/* 5 * Division/Remainder 6 * 7 * Input is: 8 * dividend -- the thing being divided 9 * divisor -- how many ways to divide it 10 * Important parameters: 11 * N -- how many bits per iteration we try to get 12 * as our current guess: 13 * WORDSIZE -- how many bits altogether we're talking about: 14 * obviously: 15 * A derived constant: 16 * TOPBITS -- how many bits are in the top "decade" of a number: 17 * 18 * Important variables are: 19 * Q -- the partial quotient under development -- initially 0 20 * R -- the remainder so far -- initially == the dividend 21 * ITER -- number of iterations of the main division loop which will 22 * be required. Equal to CEIL( lg2(quotient)/4 ) 23 * Note that this is log_base_(2ˆ4) of the quotient. 24 * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1) 25 * Cost: 26 * current estimate for non-large dividend is 27 * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C 28 * a large dividend is one greater than 2ˆ(31-4 ) and takes a 29 * different path, as the upper bits of the quotient must be developed 30 * one bit at a time. 31 * This uses the m4 and cpp macro preprocessors. 32 */ 33/* 34 * This is the recursive definition of how we develop quotient digits. 35 * It takes three important parameters: 36 * $1 -- the current depth, 1<=$1<=4 37 * $2 -- the current accumulation of quotient bits 38 * 4 -- max depth 39 * We add a new bit to $2 and either recurse or insert the bits in the quotient. 40 * Dynamic input: 41 * %o3 -- current remainder 42 * %o2 -- current quotient 43 * %o5 -- current comparand 44 * cc -- set on current value of %o3 45 * Dynamic output: 46 * %o3', %o2', %o5', cc' 47 */ 48#include "../assembly.h" 49.text 50 .align 32 51DEFINE_COMPILERRT_FUNCTION(__umodsi3) 52 b divide 53 mov 0,%g3 ! result always nonnegative 54.text 55 .align 32 56DEFINE_COMPILERRT_FUNCTION(__modsi3) 57 orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative 58 bge divide ! if not, skip this junk 59 mov %o0,%g3 ! record sign of result in sign of %g3 60 tst %o1 61 bge 2f 62 tst %o0 63 ! %o1 < 0 64 bge divide 65 neg %o1 66 2: 67 ! %o0 < 0 68 neg %o0 69 ! FALL THROUGH 70divide: 71 ! Compute size of quotient, scale comparand. 72 orcc %o1,%g0,%o5 ! movcc %o1,%o5 73 te 2 ! if %o1 = 0 74 mov %o0,%o3 75 mov 0,%o2 76 sethi %hi(1<<(32-4 -1)),%g1 77 cmp %o3,%g1 78 blu not_really_big 79 mov 0,%o4 80 ! 81 ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here, 82 ! as our usual 4-at-a-shot divide step will cause overflow and havoc. 83 ! The total number of bits in the result here is 4*%o4+%g2, where 84 ! %g2 <= 4. 85 ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into 86! the top decade: so don't even bother to compare to %o3. 871: 88 cmp %o5,%g1 89 bgeu 3f 90 mov 1,%g2 91 sll %o5,4,%o5 92 b 1b 93 inc %o4 94! Now compute %g2 952: addcc %o5,%o5,%o5 96 bcc not_too_big 97 add %g2,1,%g2 98 ! We're here if the %o1 overflowed when Shifting. 99 ! This means that %o3 has the high-order bit set. 100 ! Restore %o5 and subtract from %o3. 101 sll %g1,4 ,%g1 ! high order bit 102 srl %o5,1,%o5 ! rest of %o5 103 add %o5,%g1,%o5 104 b do_single_div 105 dec %g2 106not_too_big: 1073: cmp %o5,%o3 108 blu 2b 109 nop 110 be do_single_div 111 nop 112! %o5 > %o3: went too far: back up 1 step 113! srl %o5,1,%o5 114! dec %g2 115! do single-bit divide steps 116! 117! We have to be careful here. We know that %o3 >= %o5, so we can do the 118! first divide step without thinking. BUT, the others are conditional, 119! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 120! order bit set in the first step, just falling into the regular 121! division loop will mess up the first time around. 122! So we unroll slightly... 123do_single_div: 124 deccc %g2 125 bl end_regular_divide 126 nop 127 sub %o3,%o5,%o3 128 mov 1,%o2 129 b,a end_single_divloop 130 ! EMPTY 131single_divloop: 132 sll %o2,1,%o2 133 bl 1f 134 srl %o5,1,%o5 135 ! %o3 >= 0 136 sub %o3,%o5,%o3 137 b 2f 138 inc %o2 139 1: ! %o3 < 0 140 add %o3,%o5,%o3 141 dec %o2 142 2: 143 end_single_divloop: 144 deccc %g2 145 bge single_divloop 146 tst %o3 147 b,a end_regular_divide 148 ! EMPTY 149not_really_big: 1501: 151 sll %o5,4,%o5 152 cmp %o5,%o3 153 bleu 1b 154 inccc %o4 155 be got_result 156 dec %o4 157do_regular_divide: 158 ! Do the main division iteration 159 tst %o3 160 ! Fall through into divide loop 161divloop: 162 sll %o2,4,%o2 163 !depth 1, accumulated bits 0 164 bl L.1.16 165 srl %o5,1,%o5 166 ! remainder is nonnegative 167 subcc %o3,%o5,%o3 168 !depth 2, accumulated bits 1 169 bl L.2.17 170 srl %o5,1,%o5 171 ! remainder is nonnegative 172 subcc %o3,%o5,%o3 173 !depth 3, accumulated bits 3 174 bl L.3.19 175 srl %o5,1,%o5 176 ! remainder is nonnegative 177 subcc %o3,%o5,%o3 178 !depth 4, accumulated bits 7 179 bl L.4.23 180 srl %o5,1,%o5 181 ! remainder is nonnegative 182 subcc %o3,%o5,%o3 183 b 9f 184 add %o2, (7*2+1), %o2 185L.4.23: 186 ! remainder is negative 187 addcc %o3,%o5,%o3 188 b 9f 189 add %o2, (7*2-1), %o2 190L.3.19: 191 ! remainder is negative 192 addcc %o3,%o5,%o3 193 !depth 4, accumulated bits 5 194 bl L.4.21 195 srl %o5,1,%o5 196 ! remainder is nonnegative 197 subcc %o3,%o5,%o3 198 b 9f 199 add %o2, (5*2+1), %o2 200L.4.21: 201 ! remainder is negative 202 addcc %o3,%o5,%o3 203 b 9f 204 add %o2, (5*2-1), %o2 205L.2.17: 206 ! remainder is negative 207 addcc %o3,%o5,%o3 208 !depth 3, accumulated bits 1 209 bl L.3.17 210 srl %o5,1,%o5 211 ! remainder is nonnegative 212 subcc %o3,%o5,%o3 213 !depth 4, accumulated bits 3 214 bl L.4.19 215 srl %o5,1,%o5 216 ! remainder is nonnegative 217 subcc %o3,%o5,%o3 218 b 9f 219 add %o2, (3*2+1), %o2 220L.4.19: 221 ! remainder is negative 222 addcc %o3,%o5,%o3 223 b 9f 224 add %o2, (3*2-1), %o2 225L.3.17: 226 ! remainder is negative 227 addcc %o3,%o5,%o3 228 !depth 4, accumulated bits 1 229 bl L.4.17 230 srl %o5,1,%o5 231 ! remainder is nonnegative 232 subcc %o3,%o5,%o3 233 b 9f 234 add %o2, (1*2+1), %o2 235L.4.17: 236 ! remainder is negative 237 addcc %o3,%o5,%o3 238 b 9f 239 add %o2, (1*2-1), %o2 240L.1.16: 241 ! remainder is negative 242 addcc %o3,%o5,%o3 243 !depth 2, accumulated bits -1 244 bl L.2.15 245 srl %o5,1,%o5 246 ! remainder is nonnegative 247 subcc %o3,%o5,%o3 248 !depth 3, accumulated bits -1 249 bl L.3.15 250 srl %o5,1,%o5 251 ! remainder is nonnegative 252 subcc %o3,%o5,%o3 253 !depth 4, accumulated bits -1 254 bl L.4.15 255 srl %o5,1,%o5 256 ! remainder is nonnegative 257 subcc %o3,%o5,%o3 258 b 9f 259 add %o2, (-1*2+1), %o2 260L.4.15: 261 ! remainder is negative 262 addcc %o3,%o5,%o3 263 b 9f 264 add %o2, (-1*2-1), %o2 265L.3.15: 266 ! remainder is negative 267 addcc %o3,%o5,%o3 268 !depth 4, accumulated bits -3 269 bl L.4.13 270 srl %o5,1,%o5 271 ! remainder is nonnegative 272 subcc %o3,%o5,%o3 273 b 9f 274 add %o2, (-3*2+1), %o2 275L.4.13: 276 ! remainder is negative 277 addcc %o3,%o5,%o3 278 b 9f 279 add %o2, (-3*2-1), %o2 280L.2.15: 281 ! remainder is negative 282 addcc %o3,%o5,%o3 283 !depth 3, accumulated bits -3 284 bl L.3.13 285 srl %o5,1,%o5 286 ! remainder is nonnegative 287 subcc %o3,%o5,%o3 288 !depth 4, accumulated bits -5 289 bl L.4.11 290 srl %o5,1,%o5 291 ! remainder is nonnegative 292 subcc %o3,%o5,%o3 293 b 9f 294 add %o2, (-5*2+1), %o2 295L.4.11: 296 ! remainder is negative 297 addcc %o3,%o5,%o3 298 b 9f 299 add %o2, (-5*2-1), %o2 300L.3.13: 301 ! remainder is negative 302 addcc %o3,%o5,%o3 303 !depth 4, accumulated bits -7 304 bl L.4.9 305 srl %o5,1,%o5 306 ! remainder is nonnegative 307 subcc %o3,%o5,%o3 308 b 9f 309 add %o2, (-7*2+1), %o2 310L.4.9: 311 ! remainder is negative 312 addcc %o3,%o5,%o3 313 b 9f 314 add %o2, (-7*2-1), %o2 315 9: 316end_regular_divide: 317 deccc %o4 318 bge divloop 319 tst %o3 320 bl,a got_result 321 ! non-restoring fixup if remainder < 0, otherwise annulled 322 add %o3,%o1,%o3 323got_result: 324 tst %g3 325 bl,a 1f 326 ! negate for answer < 0, otherwise annulled 327 neg %o3,%o3 ! %o3 <- -%o3 3281: 329 retl ! leaf-routine return 330 mov %o3,%o0 ! remainder <- %o3 331