1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23! .seg "data" 24! .asciz "Copyr 1986 Sun Micro" 25 .seg "text" 26 27#ident "%Z%%M% %I% %E% SMI" 28 29/* 30 * Copyright 1986 Sun Microsystems, Inc. All rights reserved. 31 * Use is subject to license terms. 32 */ 33 34/* 35 * divison/remainder 36 * 37 * Input is: 38 * dividend -- the thing being divided 39 * divisor -- how many ways to divide 40 * Important parameters: 41 * N -- how many bits per iteration we try to get 42 * as our current guess: 43 * WORDSIZE -- how many bits altogether we're talking about: 44 * obviously: 45 * A derived constant: 46 * TOPBITS -- how many bits are in the top "decade" of a number: 47 * 48 * Important variables are: 49 * Q -- the partial quotient under development -- initally 0 50 * R -- the remainder so far -- initially == the dividend 51 * ITER -- number of iterations of the main division loop will 52 * be required. Equal to CEIL( lg2(quotient)/4 ) 53 * Note that this is log_base_(2^4) of the quotient. 54 * V -- the current comparand -- initially divisor*2^(ITER*4-1) 55 * Cost: 56 * current estimate for non-large dividend is 57 * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C 58 * a large dividend is one greater than 2^(31-4 ) and takes a 59 * different path, as the upper bits of the quotient must be developed 60 * one bit at a time. 61 */ 62 63#include <sys/trap.h> 64#include <sys/asm_linkage.h> 65 66 67 68 69 70 71 72 73 ! working variable 74 75 76/* 77 * this is the recursive definition of how we develop quotient digits. 78 * it takes three important parameters: 79 * $1 -- the current depth, 1<=$1<=4 80 * $2 -- the current accumulation of quotient bits 81 * 4 -- max depth 82 * We add a new bit to $2 and either recurse or 83 * insert the bits in the quotient. 84 * Dynamic input: 85 * %o3 -- current remainder 86 * %o2 -- current quotient 87 * %o5 -- current comparand 88 * cc -- set on current value of %o3 89 * Dynamic output: 90 * %o3', %o2', %o5', cc' 91 */ 92 93 94 95 96! RTENTRY(.urem) ! UNSIGNED REMAINDER 97 .global .urem 98.urem: 99 b divide 100 mov 0,%g1 ! result always positive 101 102! RTENTRY(.rem) ! SIGNED REMAINDER 103 .global .rem 104.rem: 105 orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative 106 bge divide ! if not, skip this junk 107 mov %o0,%g1 ! record sign of result in sign of %g1 108 tst %o1 109 bge 2f 110 tst %o0 111 ! %o1 < 0 112 bge divide 113 neg %o1 114 2: 115 ! %o0 < 0 116 neg %o0 117 ! FALL THROUGH 118 119 120divide: 121! compute size of quotient, scale comparand 122 orcc %o1,%g0,%o5 ! movcc %o1,%o5 123 bnz 0f ! if %o1 != 0 124 mov %o0,%o3 125 ba zero_divide 126 nop 1270: 128 cmp %o3,%o5 129 blu got_result ! if %o3<%o5 already, there's no point in continuing 130 mov 0,%o2 131 sethi %hi(1<<(32-4 -1)),%g2 132 cmp %o3,%g2 133 blu not_really_big 134 mov 0,%o4 135 ! 136 ! here, the %o0 is >= 2^(31-4) or so. We must be careful here, as 137 ! our usual 4-at-a-shot divide step will cause overflow and havoc. The 138 ! total number of bits in the result here is 4*%o4+%g3, where %g3 <= 4. 139 ! compute %o4, in an unorthodox manner: know we need to Shift %o5 into 140 ! the top decade: so don't even bother to compare to %o3. 141 1: 142 cmp %o5,%g2 143 bgeu 3f 144 mov 1,%g3 145 sll %o5,4,%o5 146 b 1b 147 inc %o4 148 ! now compute %g3 149 2: addcc %o5,%o5,%o5 150 bcc not_too_big ! bcc not_too_big 151 add %g3,1,%g3 152 ! 153 ! here if the %o1 overflowed when Shifting 154 ! this means that %o3 has the high-order bit set 155 ! restore %o5 and subtract from %o3 156 sll %g2,4 ,%g2 ! high order bit 157 srl %o5,1,%o5 ! rest of %o5 158 add %o5,%g2,%o5 159 b do_single_div 160 sub %g3,1,%g3 161 not_too_big: 162 3: cmp %o5,%o3 163 blu 2b 164 nop 165 be do_single_div 166 nop 167 ! %o5 > %o3: went too far: back up 1 step 168 ! srl %o5,1,%o5 169 ! dec %g3 170 ! do single-bit divide steps 171 ! 172 ! we have to be careful here. We know that %o3 >= %o5, so we can do the 173 ! first divide step without thinking. BUT, the others are conditional, 174 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 175 ! order bit set in the first step, just falling into the regular 176 ! division loop will mess up the first time around. 177 ! So we unroll slightly... 178 do_single_div: 179 deccc %g3 180 bl end_regular_divide 181 nop 182 sub %o3,%o5,%o3 183 mov 1,%o2 184 b,a end_single_divloop 185 single_divloop: 186 sll %o2,1,%o2 187 bl 1f 188 srl %o5,1,%o5 189 ! %o3 >= 0 190 sub %o3,%o5,%o3 191 b 2f 192 inc %o2 193 1: ! %o3 < 0 194 add %o3,%o5,%o3 195 dec %o2 196 2: 197 end_single_divloop: 198 deccc %g3 199 bge single_divloop 200 tst %o3 201 b,a end_regular_divide 202 203not_really_big: 2041: 205 sll %o5,4,%o5 206 cmp %o5,%o3 207 bleu 1b 208 inccc %o4 209 be got_result 210 dec %o4 211do_regular_divide: 212 213! do the main division iteration 214 tst %o3 215! fall through into divide loop 216divloop: 217 sll %o2,4,%o2 218 !depth 1, accumulated bits 0 219 bl L.1.16 220 srl %o5,1,%o5 221 ! remainder is positive 222 subcc %o3,%o5,%o3 223 !depth 2, accumulated bits 1 224 bl L.2.17 225 srl %o5,1,%o5 226 ! remainder is positive 227 subcc %o3,%o5,%o3 228 !depth 3, accumulated bits 3 229 bl L.3.19 230 srl %o5,1,%o5 231 ! remainder is positive 232 subcc %o3,%o5,%o3 233 !depth 4, accumulated bits 7 234 bl L.4.23 235 srl %o5,1,%o5 236 ! remainder is positive 237 subcc %o3,%o5,%o3 238 b 9f 239 add %o2, (7*2+1), %o2 240 241L.4.23: ! remainder is negative 242 addcc %o3,%o5,%o3 243 b 9f 244 add %o2, (7*2-1), %o2 245 246 247 248 249L.3.19: ! remainder is negative 250 addcc %o3,%o5,%o3 251 !depth 4, accumulated bits 5 252 bl L.4.21 253 srl %o5,1,%o5 254 ! remainder is positive 255 subcc %o3,%o5,%o3 256 b 9f 257 add %o2, (5*2+1), %o2 258 259L.4.21: ! remainder is negative 260 addcc %o3,%o5,%o3 261 b 9f 262 add %o2, (5*2-1), %o2 263 264 265 266 267 268 269 270L.2.17: ! remainder is negative 271 addcc %o3,%o5,%o3 272 !depth 3, accumulated bits 1 273 bl L.3.17 274 srl %o5,1,%o5 275 ! remainder is positive 276 subcc %o3,%o5,%o3 277 !depth 4, accumulated bits 3 278 bl L.4.19 279 srl %o5,1,%o5 280 ! remainder is positive 281 subcc %o3,%o5,%o3 282 b 9f 283 add %o2, (3*2+1), %o2 284 285L.4.19: ! remainder is negative 286 addcc %o3,%o5,%o3 287 b 9f 288 add %o2, (3*2-1), %o2 289 290 291 292 293L.3.17: ! remainder is negative 294 addcc %o3,%o5,%o3 295 !depth 4, accumulated bits 1 296 bl L.4.17 297 srl %o5,1,%o5 298 ! remainder is positive 299 subcc %o3,%o5,%o3 300 b 9f 301 add %o2, (1*2+1), %o2 302 303L.4.17: ! remainder is negative 304 addcc %o3,%o5,%o3 305 b 9f 306 add %o2, (1*2-1), %o2 307 308 309 310 311 312 313 314 315 316 317L.1.16: ! remainder is negative 318 addcc %o3,%o5,%o3 319 !depth 2, accumulated bits -1 320 bl L.2.15 321 srl %o5,1,%o5 322 ! remainder is positive 323 subcc %o3,%o5,%o3 324 !depth 3, accumulated bits -1 325 bl L.3.15 326 srl %o5,1,%o5 327 ! remainder is positive 328 subcc %o3,%o5,%o3 329 !depth 4, accumulated bits -1 330 bl L.4.15 331 srl %o5,1,%o5 332 ! remainder is positive 333 subcc %o3,%o5,%o3 334 b 9f 335 add %o2, (-1*2+1), %o2 336 337L.4.15: ! remainder is negative 338 addcc %o3,%o5,%o3 339 b 9f 340 add %o2, (-1*2-1), %o2 341 342 343 344 345L.3.15: ! remainder is negative 346 addcc %o3,%o5,%o3 347 !depth 4, accumulated bits -3 348 bl L.4.13 349 srl %o5,1,%o5 350 ! remainder is positive 351 subcc %o3,%o5,%o3 352 b 9f 353 add %o2, (-3*2+1), %o2 354 355L.4.13: ! remainder is negative 356 addcc %o3,%o5,%o3 357 b 9f 358 add %o2, (-3*2-1), %o2 359 360 361 362 363 364 365 366L.2.15: ! remainder is negative 367 addcc %o3,%o5,%o3 368 !depth 3, accumulated bits -3 369 bl L.3.13 370 srl %o5,1,%o5 371 ! remainder is positive 372 subcc %o3,%o5,%o3 373 !depth 4, accumulated bits -5 374 bl L.4.11 375 srl %o5,1,%o5 376 ! remainder is positive 377 subcc %o3,%o5,%o3 378 b 9f 379 add %o2, (-5*2+1), %o2 380 381L.4.11: ! remainder is negative 382 addcc %o3,%o5,%o3 383 b 9f 384 add %o2, (-5*2-1), %o2 385 386 387 388 389L.3.13: ! remainder is negative 390 addcc %o3,%o5,%o3 391 !depth 4, accumulated bits -7 392 bl L.4.9 393 srl %o5,1,%o5 394 ! remainder is positive 395 subcc %o3,%o5,%o3 396 b 9f 397 add %o2, (-7*2+1), %o2 398 399L.4.9: ! remainder is negative 400 addcc %o3,%o5,%o3 401 b 9f 402 add %o2, (-7*2-1), %o2 403 404 405 406 407 408 409 410 411 412 413 9: 414 415end_regular_divide: 416 deccc %o4 417 bge divloop 418 tst %o3 419 bl,a got_result 420 add %o3,%o1,%o3 421 422 423got_result: 424 tst %g1 425 bl,a 1f 426 neg %o3 ! remainder <- -%o3 427 4281: 429 retl 430 mov %o3,%o0 ! remainder <- %o3 431 432 433zero_divide: 434 ta ST_DIV0 ! divide by zero trap 435 retl ! if handled, ignored, return 436 mov 0, %o0 437