1~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP 3M68000 Hi-Performance Microprocessor Division 4M68060 Software Package 5Production Release P1.00 -- October 10, 1994 6 7M68060 Software Package Copyright � 1993, 1994 Motorola Inc. All rights reserved. 8 9THE SOFTWARE is provided on an "AS IS" basis and without warranty. 10To the maximum extent permitted by applicable law, 11MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, 12INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE 13and any warranty against infringement with regard to the SOFTWARE 14(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. 15 16To the maximum extent permitted by applicable law, 17IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER 18(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, 19BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) 20ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. 21Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. 22 23You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE 24so long as this entire notice is retained without alteration in any modified and/or 25redistributed versions, and that such modified versions are clearly identified as such. 26No licenses are granted by implication, estoppel or otherwise under any patents 27or trademarks of Motorola, Inc. 28~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 29# litop.s: 30# This file is appended to the top of the 060FPLSP package 31# and contains the entry points into the package. The user, in 32# effect, branches to one of the branch table entries located here. 33# 34 35 bra.l _060LSP__idivs64_ 36 short 0x0000 37 bra.l _060LSP__idivu64_ 38 short 0x0000 39 40 bra.l _060LSP__imuls64_ 41 short 0x0000 42 bra.l _060LSP__imulu64_ 43 short 0x0000 44 45 bra.l _060LSP__cmp2_Ab_ 46 short 0x0000 47 bra.l _060LSP__cmp2_Aw_ 48 short 0x0000 49 bra.l _060LSP__cmp2_Al_ 50 short 0x0000 51 bra.l _060LSP__cmp2_Db_ 52 short 0x0000 53 bra.l _060LSP__cmp2_Dw_ 54 short 0x0000 55 bra.l _060LSP__cmp2_Dl_ 56 short 0x0000 57 58# leave room for future possible aditions. 59 align 0x200 60 61######################################################################### 62# XDEF **************************************************************** # 63# _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. # 64# _060LSP__idivs64_(): Emulate 64-bit signed div instruction. # 65# # 66# This is the library version which is accessed as a subroutine # 67# and therefore does not work exactly like the 680X0 div{s,u}.l # 68# 64-bit divide instruction. # 69# # 70# XREF **************************************************************** # 71# None. # 72# # 73# INPUT *************************************************************** # 74# 0x4(sp) = divisor # 75# 0x8(sp) = hi(dividend) # 76# 0xc(sp) = lo(dividend) # 77# 0x10(sp) = pointer to location to place quotient/remainder # 78# # 79# OUTPUT ************************************************************** # 80# 0x10(sp) = points to location of remainder/quotient. # 81# remainder is in first longword, quotient is in 2nd. # 82# # 83# ALGORITHM *********************************************************** # 84# If the operands are signed, make them unsigned and save the # 85# sign info for later. Separate out special cases like divide-by-zero # 86# or 32-bit divides if possible. Else, use a special math algorithm # 87# to calculate the result. # 88# Restore sign info if signed instruction. Set the condition # 89# codes before performing the final "rts". If the divisor was equal to # 90# zero, then perform a divide-by-zero using a 16-bit implemented # 91# divide instruction. This way, the operating system can record that # 92# the event occurred even though it may not point to the correct place. # 93# # 94######################################################################### 95 96set POSNEG, -1 97set NDIVISOR, -2 98set NDIVIDEND, -3 99set DDSECOND, -4 100set DDNORMAL, -8 101set DDQUOTIENT, -12 102set DIV64_CC, -16 103 104########## 105# divs.l # 106########## 107 global _060LSP__idivs64_ 108_060LSP__idivs64_: 109# PROLOGUE BEGIN ######################################################## 110 link.w %a6,&-16 111 movm.l &0x3f00,-(%sp) # save d2-d7 112# fmovm.l &0x0,-(%sp) # save no fpregs 113# PROLOGUE END ########################################################## 114 115 mov.w %cc,DIV64_CC(%a6) 116 st POSNEG(%a6) # signed operation 117 bra.b ldiv64_cont 118 119########## 120# divu.l # 121########## 122 global _060LSP__idivu64_ 123_060LSP__idivu64_: 124# PROLOGUE BEGIN ######################################################## 125 link.w %a6,&-16 126 movm.l &0x3f00,-(%sp) # save d2-d7 127# fmovm.l &0x0,-(%sp) # save no fpregs 128# PROLOGUE END ########################################################## 129 130 mov.w %cc,DIV64_CC(%a6) 131 sf POSNEG(%a6) # unsigned operation 132 133ldiv64_cont: 134 mov.l 0x8(%a6),%d7 # fetch divisor 135 136 beq.w ldiv64eq0 # divisor is = 0!!! 137 138 mov.l 0xc(%a6), %d5 # get dividend hi 139 mov.l 0x10(%a6), %d6 # get dividend lo 140 141# separate signed and unsigned divide 142 tst.b POSNEG(%a6) # signed or unsigned? 143 beq.b ldspecialcases # use positive divide 144 145# save the sign of the divisor 146# make divisor unsigned if it's negative 147 tst.l %d7 # chk sign of divisor 148 slt NDIVISOR(%a6) # save sign of divisor 149 bpl.b ldsgndividend 150 neg.l %d7 # complement negative divisor 151 152# save the sign of the dividend 153# make dividend unsigned if it's negative 154ldsgndividend: 155 tst.l %d5 # chk sign of hi(dividend) 156 slt NDIVIDEND(%a6) # save sign of dividend 157 bpl.b ldspecialcases 158 159 mov.w &0x0, %cc # clear 'X' cc bit 160 negx.l %d6 # complement signed dividend 161 negx.l %d5 162 163# extract some special cases: 164# - is (dividend == 0) ? 165# - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div) 166ldspecialcases: 167 tst.l %d5 # is (hi(dividend) == 0) 168 bne.b ldnormaldivide # no, so try it the long way 169 170 tst.l %d6 # is (lo(dividend) == 0), too 171 beq.w lddone # yes, so (dividend == 0) 172 173 cmp.l %d7,%d6 # is (divisor <= lo(dividend)) 174 bls.b ld32bitdivide # yes, so use 32 bit divide 175 176 exg %d5,%d6 # q = 0, r = dividend 177 bra.w ldivfinish # can't divide, we're done. 178 179ld32bitdivide: 180 tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div! 181 182 bra.b ldivfinish 183 184ldnormaldivide: 185# last special case: 186# - is hi(dividend) >= divisor ? if yes, then overflow 187 cmp.l %d7,%d5 188 bls.b lddovf # answer won't fit in 32 bits 189 190# perform the divide algorithm: 191 bsr.l ldclassical # do int divide 192 193# separate into signed and unsigned finishes. 194ldivfinish: 195 tst.b POSNEG(%a6) # do divs, divu separately 196 beq.b lddone # divu has no processing!!! 197 198# it was a divs.l, so ccode setting is a little more complicated... 199 tst.b NDIVIDEND(%a6) # remainder has same sign 200 beq.b ldcc # as dividend. 201 neg.l %d5 # sgn(rem) = sgn(dividend) 202ldcc: 203 mov.b NDIVISOR(%a6), %d0 204 eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative 205 beq.b ldqpos # branch to quot positive 206 207# 0x80000000 is the largest number representable as a 32-bit negative 208# number. the negative of 0x80000000 is 0x80000000. 209 cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits? 210 bhi.b lddovf 211 212 neg.l %d6 # make (-quot) 2's comp 213 214 bra.b lddone 215 216ldqpos: 217 btst &0x1f, %d6 # will (+quot) fit in 32 bits? 218 bne.b lddovf 219 220lddone: 221# if the register numbers are the same, only the quotient gets saved. 222# so, if we always save the quotient second, we save ourselves a cmp&beq 223 andi.w &0x10,DIV64_CC(%a6) 224 mov.w DIV64_CC(%a6),%cc 225 tst.l %d6 # may set 'N' ccode bit 226 227# here, the result is in d1 and d0. the current strategy is to save 228# the values at the location pointed to by a0. 229# use movm here to not disturb the condition codes. 230ldexit: 231 movm.l &0x0060,([0x14,%a6]) # save result 232 233# EPILOGUE BEGIN ######################################################## 234# fmovm.l (%sp)+,&0x0 # restore no fpregs 235 movm.l (%sp)+,&0x00fc # restore d2-d7 236 unlk %a6 237# EPILOGUE END ########################################################## 238 239 rts 240 241# the result should be the unchanged dividend 242lddovf: 243 mov.l 0xc(%a6), %d5 # get dividend hi 244 mov.l 0x10(%a6), %d6 # get dividend lo 245 246 andi.w &0x1c,DIV64_CC(%a6) 247 ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit 248 mov.w DIV64_CC(%a6),%cc 249 250 bra.b ldexit 251 252ldiv64eq0: 253 mov.l 0xc(%a6),([0x14,%a6]) 254 mov.l 0x10(%a6),([0x14,%a6],0x4) 255 256 mov.w DIV64_CC(%a6),%cc 257 258# EPILOGUE BEGIN ######################################################## 259# fmovm.l (%sp)+,&0x0 # restore no fpregs 260 movm.l (%sp)+,&0x00fc # restore d2-d7 261 unlk %a6 262# EPILOGUE END ########################################################## 263 264 divu.w &0x0,%d0 # force a divbyzero exception 265 rts 266 267########################################################################### 268######################################################################### 269# This routine uses the 'classical' Algorithm D from Donald Knuth's # 270# Art of Computer Programming, vol II, Seminumerical Algorithms. # 271# For this implementation b=2**16, and the target is U1U2U3U4/V1V2, # 272# where U,V are words of the quadword dividend and longword divisor, # 273# and U1, V1 are the most significant words. # 274# # 275# The most sig. longword of the 64 bit dividend must be in %d5, least # 276# in %d6. The divisor must be in the variable ddivisor, and the # 277# signed/unsigned flag ddusign must be set (0=unsigned,1=signed). # 278# The quotient is returned in %d6, remainder in %d5, unless the # 279# v (overflow) bit is set in the saved %ccr. If overflow, the dividend # 280# is unchanged. # 281######################################################################### 282ldclassical: 283# if the divisor msw is 0, use simpler algorithm then the full blown 284# one at ddknuth: 285 286 cmpi.l %d7, &0xffff 287 bhi.b lddknuth # go use D. Knuth algorithm 288 289# Since the divisor is only a word (and larger than the mslw of the dividend), 290# a simpler algorithm may be used : 291# In the general case, four quotient words would be created by 292# dividing the divisor word into each dividend word. In this case, 293# the first two quotient words must be zero, or overflow would occur. 294# Since we already checked this case above, we can treat the most significant 295# longword of the dividend as (0) remainder (see Knuth) and merely complete 296# the last two divisions to get a quotient longword and word remainder: 297 298 clr.l %d1 299 swap %d5 # same as r*b if previous step rqd 300 swap %d6 # get u3 to lsw position 301 mov.w %d6, %d5 # rb + u3 302 303 divu.w %d7, %d5 304 305 mov.w %d5, %d1 # first quotient word 306 swap %d6 # get u4 307 mov.w %d6, %d5 # rb + u4 308 309 divu.w %d7, %d5 310 311 swap %d1 312 mov.w %d5, %d1 # 2nd quotient 'digit' 313 clr.w %d5 314 swap %d5 # now remainder 315 mov.l %d1, %d6 # and quotient 316 317 rts 318 319lddknuth: 320# In this algorithm, the divisor is treated as a 2 digit (word) number 321# which is divided into a 3 digit (word) dividend to get one quotient 322# digit (word). After subtraction, the dividend is shifted and the 323# process repeated. Before beginning, the divisor and quotient are 324# 'normalized' so that the process of estimating the quotient digit 325# will yield verifiably correct results.. 326 327 clr.l DDNORMAL(%a6) # count of shifts for normalization 328 clr.b DDSECOND(%a6) # clear flag for quotient digits 329 clr.l %d1 # %d1 will hold trial quotient 330lddnchk: 331 btst &31, %d7 # must we normalize? first word of 332 bne.b lddnormalized # divisor (V1) must be >= 65536/2 333 addq.l &0x1, DDNORMAL(%a6) # count normalization shifts 334 lsl.l &0x1, %d7 # shift the divisor 335 lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2 336 roxl.l &0x1, %d5 # shift u1,u2 337 bra.w lddnchk 338lddnormalized: 339 340# Now calculate an estimate of the quotient words (msw first, then lsw). 341# The comments use subscripts for the first quotient digit determination. 342 mov.l %d7, %d3 # divisor 343 mov.l %d5, %d2 # dividend mslw 344 swap %d2 345 swap %d3 346 cmp.w %d2, %d3 # V1 = U1 ? 347 bne.b lddqcalc1 348 mov.w &0xffff, %d1 # use max trial quotient word 349 bra.b lddadj0 350lddqcalc1: 351 mov.l %d5, %d1 352 353 divu.w %d3, %d1 # use quotient of mslw/msw 354 355 andi.l &0x0000ffff, %d1 # zero any remainder 356lddadj0: 357 358# now test the trial quotient and adjust. This step plus the 359# normalization assures (according to Knuth) that the trial 360# quotient will be at worst 1 too large. 361 mov.l %d6, -(%sp) 362 clr.w %d6 # word u3 left 363 swap %d6 # in lsw position 364lddadj1: mov.l %d7, %d3 365 mov.l %d1, %d2 366 mulu.w %d7, %d2 # V2q 367 swap %d3 368 mulu.w %d1, %d3 # V1q 369 mov.l %d5, %d4 # U1U2 370 sub.l %d3, %d4 # U1U2 - V1q 371 372 swap %d4 373 374 mov.w %d4,%d0 375 mov.w %d6,%d4 # insert lower word (U3) 376 377 tst.w %d0 # is upper word set? 378 bne.w lddadjd1 379 380# add.l %d6, %d4 # (U1U2 - V1q) + U3 381 382 cmp.l %d2, %d4 383 bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ? 384 subq.l &0x1, %d1 # yes, decrement and recheck 385 bra.b lddadj1 386lddadjd1: 387# now test the word by multiplying it by the divisor (V1V2) and comparing 388# the 3 digit (word) result with the current dividend words 389 mov.l %d5, -(%sp) # save %d5 (%d6 already saved) 390 mov.l %d1, %d6 391 swap %d6 # shift answer to ms 3 words 392 mov.l %d7, %d5 393 bsr.l ldmm2 394 mov.l %d5, %d2 # now %d2,%d3 are trial*divisor 395 mov.l %d6, %d3 396 mov.l (%sp)+, %d5 # restore dividend 397 mov.l (%sp)+, %d6 398 sub.l %d3, %d6 399 subx.l %d2, %d5 # subtract double precision 400 bcc ldd2nd # no carry, do next quotient digit 401 subq.l &0x1, %d1 # q is one too large 402# need to add back divisor longword to current ms 3 digits of dividend 403# - according to Knuth, this is done only 2 out of 65536 times for random 404# divisor, dividend selection. 405 clr.l %d2 406 mov.l %d7, %d3 407 swap %d3 408 clr.w %d3 # %d3 now ls word of divisor 409 add.l %d3, %d6 # aligned with 3rd word of dividend 410 addx.l %d2, %d5 411 mov.l %d7, %d3 412 clr.w %d3 # %d3 now ms word of divisor 413 swap %d3 # aligned with 2nd word of dividend 414 add.l %d3, %d5 415ldd2nd: 416 tst.b DDSECOND(%a6) # both q words done? 417 bne.b lddremain 418# first quotient digit now correct. store digit and shift the 419# (subtracted) dividend 420 mov.w %d1, DDQUOTIENT(%a6) 421 clr.l %d1 422 swap %d5 423 swap %d6 424 mov.w %d6, %d5 425 clr.w %d6 426 st DDSECOND(%a6) # second digit 427 bra.w lddnormalized 428lddremain: 429# add 2nd word to quotient, get the remainder. 430 mov.w %d1, DDQUOTIENT+2(%a6) 431# shift down one word/digit to renormalize remainder. 432 mov.w %d5, %d6 433 swap %d6 434 swap %d5 435 mov.l DDNORMAL(%a6), %d7 # get norm shift count 436 beq.b lddrn 437 subq.l &0x1, %d7 # set for loop count 438lddnlp: 439 lsr.l &0x1, %d5 # shift into %d6 440 roxr.l &0x1, %d6 441 dbf %d7, lddnlp 442lddrn: 443 mov.l %d6, %d5 # remainder 444 mov.l DDQUOTIENT(%a6), %d6 # quotient 445 446 rts 447ldmm2: 448# factors for the 32X32->64 multiplication are in %d5 and %d6. 449# returns 64 bit result in %d5 (hi) %d6(lo). 450# destroys %d2,%d3,%d4. 451 452# multiply hi,lo words of each factor to get 4 intermediate products 453 mov.l %d6, %d2 454 mov.l %d6, %d3 455 mov.l %d5, %d4 456 swap %d3 457 swap %d4 458 mulu.w %d5, %d6 # %d6 <- lsw*lsw 459 mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source 460 mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest 461 mulu.w %d4, %d3 # %d3 <- msw*msw 462# now use swap and addx to consolidate to two longwords 463 clr.l %d4 464 swap %d6 465 add.w %d5, %d6 # add msw of l*l to lsw of m*l product 466 addx.w %d4, %d3 # add any carry to m*m product 467 add.w %d2, %d6 # add in lsw of other m*l product 468 addx.w %d4, %d3 # add any carry to m*m product 469 swap %d6 # %d6 is low 32 bits of final product 470 clr.w %d5 471 clr.w %d2 # lsw of two mixed products used, 472 swap %d5 # now use msws of longwords 473 swap %d2 474 add.l %d2, %d5 475 add.l %d3, %d5 # %d5 now ms 32 bits of final product 476 rts 477 478######################################################################### 479# XDEF **************************************************************** # 480# _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction # 481# _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. # 482# # 483# This is the library version which is accessed as a subroutine # 484# and therefore does not work exactly like the 680X0 mul{s,u}.l # 485# 64-bit multiply instruction. # 486# # 487# XREF **************************************************************** # 488# None # 489# # 490# INPUT *************************************************************** # 491# 0x4(sp) = multiplier # 492# 0x8(sp) = multiplicand # 493# 0xc(sp) = pointer to location to place 64-bit result # 494# # 495# OUTPUT ************************************************************** # 496# 0xc(sp) = points to location of 64-bit result # 497# # 498# ALGORITHM *********************************************************** # 499# Perform the multiply in pieces using 16x16->32 unsigned # 500# multiplies and "add" instructions. # 501# Set the condition codes as appropriate before performing an # 502# "rts". # 503# # 504######################################################################### 505 506set MUL64_CC, -4 507 508 global _060LSP__imulu64_ 509_060LSP__imulu64_: 510 511# PROLOGUE BEGIN ######################################################## 512 link.w %a6,&-4 513 movm.l &0x3800,-(%sp) # save d2-d4 514# fmovm.l &0x0,-(%sp) # save no fpregs 515# PROLOGUE END ########################################################## 516 517 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes 518 519 mov.l 0x8(%a6),%d0 # store multiplier in d0 520 beq.w mulu64_zero # handle zero separately 521 522 mov.l 0xc(%a6),%d1 # get multiplicand in d1 523 beq.w mulu64_zero # handle zero separately 524 525######################################################################### 526# 63 32 0 # 527# ---------------------------- # 528# | hi(mplier) * hi(mplicand)| # 529# ---------------------------- # 530# ----------------------------- # 531# | hi(mplier) * lo(mplicand) | # 532# ----------------------------- # 533# ----------------------------- # 534# | lo(mplier) * hi(mplicand) | # 535# ----------------------------- # 536# | ----------------------------- # 537# --|-- | lo(mplier) * lo(mplicand) | # 538# | ----------------------------- # 539# ======================================================== # 540# -------------------------------------------------------- # 541# | hi(result) | lo(result) | # 542# -------------------------------------------------------- # 543######################################################################### 544mulu64_alg: 545# load temp registers with operands 546 mov.l %d0,%d2 # mr in d2 547 mov.l %d0,%d3 # mr in d3 548 mov.l %d1,%d4 # md in d4 549 swap %d3 # hi(mr) in lo d3 550 swap %d4 # hi(md) in lo d4 551 552# complete necessary multiplies: 553 mulu.w %d1,%d0 # [1] lo(mr) * lo(md) 554 mulu.w %d3,%d1 # [2] hi(mr) * lo(md) 555 mulu.w %d4,%d2 # [3] lo(mr) * hi(md) 556 mulu.w %d4,%d3 # [4] hi(mr) * hi(md) 557 558# add lo portions of [2],[3] to hi portion of [1]. 559# add carries produced from these adds to [4]. 560# lo([1]) is the final lo 16 bits of the result. 561 clr.l %d4 # load d4 w/ zero value 562 swap %d0 # hi([1]) <==> lo([1]) 563 add.w %d1,%d0 # hi([1]) + lo([2]) 564 addx.l %d4,%d3 # [4] + carry 565 add.w %d2,%d0 # hi([1]) + lo([3]) 566 addx.l %d4,%d3 # [4] + carry 567 swap %d0 # lo([1]) <==> hi([1]) 568 569# lo portions of [2],[3] have been added in to final result. 570# now, clear lo, put hi in lo reg, and add to [4] 571 clr.w %d1 # clear lo([2]) 572 clr.w %d2 # clear hi([3]) 573 swap %d1 # hi([2]) in lo d1 574 swap %d2 # hi([3]) in lo d2 575 add.l %d2,%d1 # [4] + hi([2]) 576 add.l %d3,%d1 # [4] + hi([3]) 577 578# now, grab the condition codes. only one that can be set is 'N'. 579# 'N' CAN be set if the operation is unsigned if bit 63 is set. 580 mov.w MUL64_CC(%a6),%d4 581 andi.b &0x10,%d4 # keep old 'X' bit 582 tst.l %d1 # may set 'N' bit 583 bpl.b mulu64_ddone 584 ori.b &0x8,%d4 # set 'N' bit 585mulu64_ddone: 586 mov.w %d4,%cc 587 588# here, the result is in d1 and d0. the current strategy is to save 589# the values at the location pointed to by a0. 590# use movm here to not disturb the condition codes. 591mulu64_end: 592 exg %d1,%d0 593 movm.l &0x0003,([0x10,%a6]) # save result 594 595# EPILOGUE BEGIN ######################################################## 596# fmovm.l (%sp)+,&0x0 # restore no fpregs 597 movm.l (%sp)+,&0x001c # restore d2-d4 598 unlk %a6 599# EPILOGUE END ########################################################## 600 601 rts 602 603# one or both of the operands is zero so the result is also zero. 604# save the zero result to the register file and set the 'Z' ccode bit. 605mulu64_zero: 606 clr.l %d0 607 clr.l %d1 608 609 mov.w MUL64_CC(%a6),%d4 610 andi.b &0x10,%d4 611 ori.b &0x4,%d4 612 mov.w %d4,%cc # set 'Z' ccode bit 613 614 bra.b mulu64_end 615 616########## 617# muls.l # 618########## 619 global _060LSP__imuls64_ 620_060LSP__imuls64_: 621 622# PROLOGUE BEGIN ######################################################## 623 link.w %a6,&-4 624 movm.l &0x3c00,-(%sp) # save d2-d5 625# fmovm.l &0x0,-(%sp) # save no fpregs 626# PROLOGUE END ########################################################## 627 628 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes 629 630 mov.l 0x8(%a6),%d0 # store multiplier in d0 631 beq.b mulu64_zero # handle zero separately 632 633 mov.l 0xc(%a6),%d1 # get multiplicand in d1 634 beq.b mulu64_zero # handle zero separately 635 636 clr.b %d5 # clear sign tag 637 tst.l %d0 # is multiplier negative? 638 bge.b muls64_chk_md_sgn # no 639 neg.l %d0 # make multiplier positive 640 641 ori.b &0x1,%d5 # save multiplier sgn 642 643# the result sign is the exclusive or of the operand sign bits. 644muls64_chk_md_sgn: 645 tst.l %d1 # is multiplicand negative? 646 bge.b muls64_alg # no 647 neg.l %d1 # make multiplicand positive 648 649 eori.b &0x1,%d5 # calculate correct sign 650 651######################################################################### 652# 63 32 0 # 653# ---------------------------- # 654# | hi(mplier) * hi(mplicand)| # 655# ---------------------------- # 656# ----------------------------- # 657# | hi(mplier) * lo(mplicand) | # 658# ----------------------------- # 659# ----------------------------- # 660# | lo(mplier) * hi(mplicand) | # 661# ----------------------------- # 662# | ----------------------------- # 663# --|-- | lo(mplier) * lo(mplicand) | # 664# | ----------------------------- # 665# ======================================================== # 666# -------------------------------------------------------- # 667# | hi(result) | lo(result) | # 668# -------------------------------------------------------- # 669######################################################################### 670muls64_alg: 671# load temp registers with operands 672 mov.l %d0,%d2 # mr in d2 673 mov.l %d0,%d3 # mr in d3 674 mov.l %d1,%d4 # md in d4 675 swap %d3 # hi(mr) in lo d3 676 swap %d4 # hi(md) in lo d4 677 678# complete necessary multiplies: 679 mulu.w %d1,%d0 # [1] lo(mr) * lo(md) 680 mulu.w %d3,%d1 # [2] hi(mr) * lo(md) 681 mulu.w %d4,%d2 # [3] lo(mr) * hi(md) 682 mulu.w %d4,%d3 # [4] hi(mr) * hi(md) 683 684# add lo portions of [2],[3] to hi portion of [1]. 685# add carries produced from these adds to [4]. 686# lo([1]) is the final lo 16 bits of the result. 687 clr.l %d4 # load d4 w/ zero value 688 swap %d0 # hi([1]) <==> lo([1]) 689 add.w %d1,%d0 # hi([1]) + lo([2]) 690 addx.l %d4,%d3 # [4] + carry 691 add.w %d2,%d0 # hi([1]) + lo([3]) 692 addx.l %d4,%d3 # [4] + carry 693 swap %d0 # lo([1]) <==> hi([1]) 694 695# lo portions of [2],[3] have been added in to final result. 696# now, clear lo, put hi in lo reg, and add to [4] 697 clr.w %d1 # clear lo([2]) 698 clr.w %d2 # clear hi([3]) 699 swap %d1 # hi([2]) in lo d1 700 swap %d2 # hi([3]) in lo d2 701 add.l %d2,%d1 # [4] + hi([2]) 702 add.l %d3,%d1 # [4] + hi([3]) 703 704 tst.b %d5 # should result be signed? 705 beq.b muls64_done # no 706 707# result should be a signed negative number. 708# compute 2's complement of the unsigned number: 709# -negate all bits and add 1 710muls64_neg: 711 not.l %d0 # negate lo(result) bits 712 not.l %d1 # negate hi(result) bits 713 addq.l &1,%d0 # add 1 to lo(result) 714 addx.l %d4,%d1 # add carry to hi(result) 715 716muls64_done: 717 mov.w MUL64_CC(%a6),%d4 718 andi.b &0x10,%d4 # keep old 'X' bit 719 tst.l %d1 # may set 'N' bit 720 bpl.b muls64_ddone 721 ori.b &0x8,%d4 # set 'N' bit 722muls64_ddone: 723 mov.w %d4,%cc 724 725# here, the result is in d1 and d0. the current strategy is to save 726# the values at the location pointed to by a0. 727# use movm here to not disturb the condition codes. 728muls64_end: 729 exg %d1,%d0 730 movm.l &0x0003,([0x10,%a6]) # save result at (a0) 731 732# EPILOGUE BEGIN ######################################################## 733# fmovm.l (%sp)+,&0x0 # restore no fpregs 734 movm.l (%sp)+,&0x003c # restore d2-d5 735 unlk %a6 736# EPILOGUE END ########################################################## 737 738 rts 739 740# one or both of the operands is zero so the result is also zero. 741# save the zero result to the register file and set the 'Z' ccode bit. 742muls64_zero: 743 clr.l %d0 744 clr.l %d1 745 746 mov.w MUL64_CC(%a6),%d4 747 andi.b &0x10,%d4 748 ori.b &0x4,%d4 749 mov.w %d4,%cc # set 'Z' ccode bit 750 751 bra.b muls64_end 752 753######################################################################### 754# XDEF **************************************************************** # 755# _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". # 756# _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". # 757# _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". # 758# _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". # 759# _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". # 760# _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". # 761# # 762# This is the library version which is accessed as a subroutine # 763# and therefore does not work exactly like the 680X0 "cmp2" # 764# instruction. # 765# # 766# XREF **************************************************************** # 767# None # 768# # 769# INPUT *************************************************************** # 770# 0x4(sp) = Rn # 771# 0x8(sp) = pointer to boundary pair # 772# # 773# OUTPUT ************************************************************** # 774# cc = condition codes are set correctly # 775# # 776# ALGORITHM *********************************************************** # 777# In the interest of simplicity, all operands are converted to # 778# longword size whether the operation is byte, word, or long. The # 779# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is # 780# also sign extended. If Rn is an address register, it need not be sign # 781# extended since the full register is always used. # 782# The condition codes are set correctly before the final "rts". # 783# # 784######################################################################### 785 786set CMP2_CC, -4 787 788 global _060LSP__cmp2_Ab_ 789_060LSP__cmp2_Ab_: 790 791# PROLOGUE BEGIN ######################################################## 792 link.w %a6,&-4 793 movm.l &0x3800,-(%sp) # save d2-d4 794# fmovm.l &0x0,-(%sp) # save no fpregs 795# PROLOGUE END ########################################################## 796 797 mov.w %cc,CMP2_CC(%a6) 798 mov.l 0x8(%a6), %d2 # get regval 799 800 mov.b ([0xc,%a6],0x0),%d0 801 mov.b ([0xc,%a6],0x1),%d1 802 803 extb.l %d0 # sign extend lo bnd 804 extb.l %d1 # sign extend hi bnd 805 bra.w l_cmp2_cmp # go do the compare emulation 806 807 global _060LSP__cmp2_Aw_ 808_060LSP__cmp2_Aw_: 809 810# PROLOGUE BEGIN ######################################################## 811 link.w %a6,&-4 812 movm.l &0x3800,-(%sp) # save d2-d4 813# fmovm.l &0x0,-(%sp) # save no fpregs 814# PROLOGUE END ########################################################## 815 816 mov.w %cc,CMP2_CC(%a6) 817 mov.l 0x8(%a6), %d2 # get regval 818 819 mov.w ([0xc,%a6],0x0),%d0 820 mov.w ([0xc,%a6],0x2),%d1 821 822 ext.l %d0 # sign extend lo bnd 823 ext.l %d1 # sign extend hi bnd 824 bra.w l_cmp2_cmp # go do the compare emulation 825 826 global _060LSP__cmp2_Al_ 827_060LSP__cmp2_Al_: 828 829# PROLOGUE BEGIN ######################################################## 830 link.w %a6,&-4 831 movm.l &0x3800,-(%sp) # save d2-d4 832# fmovm.l &0x0,-(%sp) # save no fpregs 833# PROLOGUE END ########################################################## 834 835 mov.w %cc,CMP2_CC(%a6) 836 mov.l 0x8(%a6), %d2 # get regval 837 838 mov.l ([0xc,%a6],0x0),%d0 839 mov.l ([0xc,%a6],0x4),%d1 840 bra.w l_cmp2_cmp # go do the compare emulation 841 842 global _060LSP__cmp2_Db_ 843_060LSP__cmp2_Db_: 844 845# PROLOGUE BEGIN ######################################################## 846 link.w %a6,&-4 847 movm.l &0x3800,-(%sp) # save d2-d4 848# fmovm.l &0x0,-(%sp) # save no fpregs 849# PROLOGUE END ########################################################## 850 851 mov.w %cc,CMP2_CC(%a6) 852 mov.l 0x8(%a6), %d2 # get regval 853 854 mov.b ([0xc,%a6],0x0),%d0 855 mov.b ([0xc,%a6],0x1),%d1 856 857 extb.l %d0 # sign extend lo bnd 858 extb.l %d1 # sign extend hi bnd 859 860# operation is a data register compare. 861# sign extend byte to long so we can do simple longword compares. 862 extb.l %d2 # sign extend data byte 863 bra.w l_cmp2_cmp # go do the compare emulation 864 865 global _060LSP__cmp2_Dw_ 866_060LSP__cmp2_Dw_: 867 868# PROLOGUE BEGIN ######################################################## 869 link.w %a6,&-4 870 movm.l &0x3800,-(%sp) # save d2-d4 871# fmovm.l &0x0,-(%sp) # save no fpregs 872# PROLOGUE END ########################################################## 873 874 mov.w %cc,CMP2_CC(%a6) 875 mov.l 0x8(%a6), %d2 # get regval 876 877 mov.w ([0xc,%a6],0x0),%d0 878 mov.w ([0xc,%a6],0x2),%d1 879 880 ext.l %d0 # sign extend lo bnd 881 ext.l %d1 # sign extend hi bnd 882 883# operation is a data register compare. 884# sign extend word to long so we can do simple longword compares. 885 ext.l %d2 # sign extend data word 886 bra.w l_cmp2_cmp # go emulate compare 887 888 global _060LSP__cmp2_Dl_ 889_060LSP__cmp2_Dl_: 890 891# PROLOGUE BEGIN ######################################################## 892 link.w %a6,&-4 893 movm.l &0x3800,-(%sp) # save d2-d4 894# fmovm.l &0x0,-(%sp) # save no fpregs 895# PROLOGUE END ########################################################## 896 897 mov.w %cc,CMP2_CC(%a6) 898 mov.l 0x8(%a6), %d2 # get regval 899 900 mov.l ([0xc,%a6],0x0),%d0 901 mov.l ([0xc,%a6],0x4),%d1 902 903# 904# To set the ccodes correctly: 905# (1) save 'Z' bit from (Rn - lo) 906# (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi)) 907# (3) keep 'X', 'N', and 'V' from before instruction 908# (4) combine ccodes 909# 910l_cmp2_cmp: 911 sub.l %d0, %d2 # (Rn - lo) 912 mov.w %cc, %d3 # fetch resulting ccodes 913 andi.b &0x4, %d3 # keep 'Z' bit 914 sub.l %d0, %d1 # (hi - lo) 915 cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi)) 916 917 mov.w %cc, %d4 # fetch resulting ccodes 918 or.b %d4, %d3 # combine w/ earlier ccodes 919 andi.b &0x5, %d3 # keep 'Z' and 'N' 920 921 mov.w CMP2_CC(%a6), %d4 # fetch old ccodes 922 andi.b &0x1a, %d4 # keep 'X','N','V' bits 923 or.b %d3, %d4 # insert new ccodes 924 mov.w %d4,%cc # save new ccodes 925 926# EPILOGUE BEGIN ######################################################## 927# fmovm.l (%sp)+,&0x0 # restore no fpregs 928 movm.l (%sp)+,&0x001c # restore d2-d4 929 unlk %a6 930# EPILOGUE END ########################################################## 931 932 rts 933