1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22/* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#if !defined(lint) 28 .ident "%Z%%M% %I% %E% SMI" 29 30 .file "muldiv.s" 31#endif 32 33#if defined(__i386) && !defined(__amd64) 34 35/* 36 * Helper routines for 32-bit compilers to perform 64-bit math. 37 * These are used both by the Sun and GCC compilers. 38 */ 39 40#include <sys/asm_linkage.h> 41#include <sys/asm_misc.h> 42 43 44#if defined(__lint) 45#include <sys/types.h> 46 47/* ARGSUSED */ 48int64_t 49__mul64(int64_t a, int64_t b) 50{ 51 return (0); 52} 53 54#else /* __lint */ 55 56/ 57/ function __mul64(A,B:Longint):Longint; 58/ {Overflow is not checked} 59/ 60/ We essentially do multiply by longhand, using base 2**32 digits. 61/ a b parameter A 62/ x c d parameter B 63/ --------- 64/ ad bd 65/ ac bc 66/ ----------------- 67/ ac ad+bc bd 68/ 69/ We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened. 70/ 71 ENTRY(__mul64) 72 push %ebp 73 mov %esp,%ebp 74 pushl %esi 75 mov 12(%ebp),%eax / A.hi (a) 76 mull 16(%ebp) / Multiply A.hi by B.lo (produces ad) 77 xchg %ecx,%eax / ecx = bottom half of ad. 78 movl 8(%ebp),%eax / A.Lo (b) 79 movl %eax,%esi / Save A.lo for later 80 mull 16(%ebp) / Multiply A.Lo by B.LO (dx:ax = bd.) 81 addl %edx,%ecx / cx is ad 82 xchg %eax,%esi / esi is bd, eax = A.lo (d) 83 mull 20(%ebp) / Multiply A.lo * B.hi (producing bc) 84 addl %ecx,%eax / Produce ad+bc 85 movl %esi,%edx 86 xchg %eax,%edx 87 popl %esi 88 movl %ebp,%esp 89 popl %ebp 90 ret $16 91 SET_SIZE(__mul64) 92 93#endif /* __lint */ 94 95/* 96 * C support for 64-bit modulo and division. 97 * Hand-customized compiler output - see comments for details. 98 */ 99#if defined(__lint) 100 101/* ARGSUSED */ 102uint64_t 103__udiv64(uint64_t a, uint64_t b) 104{ return (0); } 105 106/* ARGSUSED */ 107uint64_t 108__urem64(int64_t a, int64_t b) 109{ return (0); } 110 111/* ARGSUSED */ 112int64_t 113__div64(int64_t a, int64_t b) 114{ return (0); } 115 116/* ARGSUSED */ 117int64_t 118__rem64(int64_t a, int64_t b) 119{ return (0); } 120 121#else /* __lint */ 122 123/ /* 124/ * Unsigned division with remainder. 125/ * Divide two uint64_ts, and calculate remainder. 126/ */ 127/ uint64_t 128/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod) 129/ { 130/ /* simple cases: y is a single uint32_t */ 131/ if (HI(y) == 0) { 132/ uint32_t div_hi, div_rem; 133/ uint32_t q0, q1; 134/ 135/ /* calculate q1 */ 136/ if (HI(x) < LO(y)) { 137/ /* result is a single uint32_t, use one division */ 138/ q1 = 0; 139/ div_hi = HI(x); 140/ } else { 141/ /* result is a double uint32_t, use two divisions */ 142/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 143/ } 144/ 145/ /* calculate q0 and remainder */ 146/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 147/ 148/ /* return remainder */ 149/ *pmod = div_rem; 150/ 151/ /* return result */ 152/ return (HILO(q1, q0)); 153/ 154/ } else if (HI(x) < HI(y)) { 155/ /* HI(x) < HI(y) => x < y => result is 0 */ 156/ 157/ /* return remainder */ 158/ *pmod = x; 159/ 160/ /* return result */ 161/ return (0); 162/ 163/ } else { 164/ /* 165/ * uint64_t by uint64_t division, resulting in a one-uint32_t 166/ * result 167/ */ 168/ uint32_t y0, y1; 169/ uint32_t x1, x0; 170/ uint32_t q0; 171/ uint32_t normshift; 172/ 173/ /* normalize by shifting x and y so MSB(y) == 1 */ 174/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 175/ normshift = 31 - normshift; 176/ 177/ if (normshift == 0) { 178/ /* no shifting needed, and x < 2*y so q <= 1 */ 179/ y1 = HI(y); 180/ y0 = LO(y); 181/ x1 = HI(x); 182/ x0 = LO(x); 183/ 184/ /* if x >= y then q = 1 (note x1 >= y1) */ 185/ if (x1 > y1 || x0 >= y0) { 186/ q0 = 1; 187/ /* subtract y from x to get remainder */ 188/ A_SUB2(y0, y1, x0, x1); 189/ } else { 190/ q0 = 0; 191/ } 192/ 193/ /* return remainder */ 194/ *pmod = HILO(x1, x0); 195/ 196/ /* return result */ 197/ return (q0); 198/ 199/ } else { 200/ /* 201/ * the last case: result is one uint32_t, but we need to 202/ * normalize 203/ */ 204/ uint64_t dt; 205/ uint32_t t0, t1, x2; 206/ 207/ /* normalize y */ 208/ dt = (y << normshift); 209/ y1 = HI(dt); 210/ y0 = LO(dt); 211/ 212/ /* normalize x (we need 3 uint32_ts!!!) */ 213/ x2 = (HI(x) >> (32 - normshift)); 214/ dt = (x << normshift); 215/ x1 = HI(dt); 216/ x0 = LO(dt); 217/ 218/ /* estimate q0, and reduce x to a two uint32_t value */ 219/ A_DIV32(x1, x2, y1, q0, x1); 220/ 221/ /* adjust q0 down if too high */ 222/ /* 223/ * because of the limited range of x2 we can only be 224/ * one off 225/ */ 226/ A_MUL32(y0, q0, t0, t1); 227/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 228/ q0--; 229/ A_SUB2(y0, y1, t0, t1); 230/ } 231/ /* return remainder */ 232/ /* subtract product from x to get remainder */ 233/ A_SUB2(t0, t1, x0, x1); 234/ *pmod = (HILO(x1, x0) >> normshift); 235/ 236/ /* return result */ 237/ return (q0); 238/ } 239/ } 240/ } 241 ENTRY(UDivRem) 242 pushl %ebp 243 pushl %edi 244 pushl %esi 245 subl $48, %esp 246 movl 68(%esp), %edi / y, 247 testl %edi, %edi / tmp63 248 movl %eax, 40(%esp) / x, x 249 movl %edx, 44(%esp) / x, x 250 movl %edi, %esi /, tmp62 251 movl %edi, %ecx / tmp62, tmp63 252 jne .LL2 253 movl %edx, %eax /, tmp68 254 cmpl 64(%esp), %eax / y, tmp68 255 jae .LL21 256.LL4: 257 movl 72(%esp), %ebp / pmod, 258 xorl %esi, %esi / <result> 259 movl 40(%esp), %eax / x, q0 260 movl %ecx, %edi / <result>, <result> 261 divl 64(%esp) / y 262 movl %edx, (%ebp) / div_rem, 263 xorl %edx, %edx / q0 264 addl %eax, %esi / q0, <result> 265 movl $0, 4(%ebp) 266 adcl %edx, %edi / q0, <result> 267 addl $48, %esp 268 movl %esi, %eax / <result>, <result> 269 popl %esi 270 movl %edi, %edx / <result>, <result> 271 popl %edi 272 popl %ebp 273 ret 274 .align 16 275.LL2: 276 movl 44(%esp), %eax / x, 277 xorl %edx, %edx 278 cmpl %esi, %eax / tmp62, tmp5 279 movl %eax, 32(%esp) / tmp5, 280 movl %edx, 36(%esp) 281 jae .LL6 282 movl 72(%esp), %esi / pmod, 283 movl 40(%esp), %ebp / x, 284 movl 44(%esp), %ecx / x, 285 movl %ebp, (%esi) 286 movl %ecx, 4(%esi) 287 xorl %edi, %edi / <result> 288 xorl %esi, %esi / <result> 289.LL22: 290 addl $48, %esp 291 movl %esi, %eax / <result>, <result> 292 popl %esi 293 movl %edi, %edx / <result>, <result> 294 popl %edi 295 popl %ebp 296 ret 297 .align 16 298.LL21: 299 movl %edi, %edx / tmp63, div_hi 300 divl 64(%esp) / y 301 movl %eax, %ecx /, q1 302 jmp .LL4 303 .align 16 304.LL6: 305 movl $31, %edi /, tmp87 306 bsrl %esi,%edx / tmp62, normshift 307 subl %edx, %edi / normshift, tmp87 308 movl %edi, 28(%esp) / tmp87, 309 jne .LL8 310 movl 32(%esp), %edx /, x1 311 cmpl %ecx, %edx / y1, x1 312 movl 64(%esp), %edi / y, y0 313 movl 40(%esp), %esi / x, x0 314 ja .LL10 315 xorl %ebp, %ebp / q0 316 cmpl %edi, %esi / y0, x0 317 jb .LL11 318.LL10: 319 movl $1, %ebp /, q0 320 subl %edi,%esi / y0, x0 321 sbbl %ecx,%edx / tmp63, x1 322.LL11: 323 movl %edx, %ecx / x1, x1 324 xorl %edx, %edx / x1 325 xorl %edi, %edi / x0 326 addl %esi, %edx / x0, x1 327 adcl %edi, %ecx / x0, x1 328 movl 72(%esp), %esi / pmod, 329 movl %edx, (%esi) / x1, 330 movl %ecx, 4(%esi) / x1, 331 xorl %edi, %edi / <result> 332 movl %ebp, %esi / q0, <result> 333 jmp .LL22 334 .align 16 335.LL8: 336 movb 28(%esp), %cl 337 movl 64(%esp), %esi / y, dt 338 movl 68(%esp), %edi / y, dt 339 shldl %esi, %edi /, dt, dt 340 sall %cl, %esi /, dt 341 andl $32, %ecx 342 jne .LL23 343.LL17: 344 movl $32, %ecx /, tmp102 345 subl 28(%esp), %ecx /, tmp102 346 movl %esi, %ebp / dt, y0 347 movl 32(%esp), %esi 348 shrl %cl, %esi / tmp102, 349 movl %edi, 24(%esp) / tmp99, 350 movb 28(%esp), %cl 351 movl %esi, 12(%esp) /, x2 352 movl 44(%esp), %edi / x, dt 353 movl 40(%esp), %esi / x, dt 354 shldl %esi, %edi /, dt, dt 355 sall %cl, %esi /, dt 356 andl $32, %ecx 357 je .LL18 358 movl %esi, %edi / dt, dt 359 xorl %esi, %esi / dt 360.LL18: 361 movl %edi, %ecx / dt, 362 movl %edi, %eax / tmp2, 363 movl %ecx, (%esp) 364 movl 12(%esp), %edx / x2, 365 divl 24(%esp) 366 movl %edx, %ecx /, x1 367 xorl %edi, %edi 368 movl %eax, 20(%esp) 369 movl %ebp, %eax / y0, t0 370 mull 20(%esp) 371 cmpl %ecx, %edx / x1, t1 372 movl %edi, 4(%esp) 373 ja .LL14 374 je .LL24 375.LL15: 376 movl %ecx, %edi / x1, 377 subl %eax,%esi / t0, x0 378 sbbl %edx,%edi / t1, 379 movl %edi, %eax /, x1 380 movl %eax, %edx / x1, x1 381 xorl %eax, %eax / x1 382 xorl %ebp, %ebp / x0 383 addl %esi, %eax / x0, x1 384 adcl %ebp, %edx / x0, x1 385 movb 28(%esp), %cl 386 shrdl %edx, %eax /, x1, x1 387 shrl %cl, %edx /, x1 388 andl $32, %ecx 389 je .LL16 390 movl %edx, %eax / x1, x1 391 xorl %edx, %edx / x1 392.LL16: 393 movl 72(%esp), %ecx / pmod, 394 movl 20(%esp), %esi /, <result> 395 xorl %edi, %edi / <result> 396 movl %eax, (%ecx) / x1, 397 movl %edx, 4(%ecx) / x1, 398 jmp .LL22 399 .align 16 400.LL24: 401 cmpl %esi, %eax / x0, t0 402 jbe .LL15 403.LL14: 404 decl 20(%esp) 405 subl %ebp,%eax / y0, t0 406 sbbl 24(%esp),%edx /, t1 407 jmp .LL15 408.LL23: 409 movl %esi, %edi / dt, dt 410 xorl %esi, %esi / dt 411 jmp .LL17 412 SET_SIZE(UDivRem) 413 414/* 415 * Unsigned division without remainder. 416 */ 417/ uint64_t 418/ UDiv(uint64_t x, uint64_t y) 419/ { 420/ if (HI(y) == 0) { 421/ /* simple cases: y is a single uint32_t */ 422/ uint32_t div_hi, div_rem; 423/ uint32_t q0, q1; 424/ 425/ /* calculate q1 */ 426/ if (HI(x) < LO(y)) { 427/ /* result is a single uint32_t, use one division */ 428/ q1 = 0; 429/ div_hi = HI(x); 430/ } else { 431/ /* result is a double uint32_t, use two divisions */ 432/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 433/ } 434/ 435/ /* calculate q0 and remainder */ 436/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 437/ 438/ /* return result */ 439/ return (HILO(q1, q0)); 440/ 441/ } else if (HI(x) < HI(y)) { 442/ /* HI(x) < HI(y) => x < y => result is 0 */ 443/ 444/ /* return result */ 445/ return (0); 446/ 447/ } else { 448/ /* 449/ * uint64_t by uint64_t division, resulting in a one-uint32_t 450/ * result 451/ */ 452/ uint32_t y0, y1; 453/ uint32_t x1, x0; 454/ uint32_t q0; 455/ unsigned normshift; 456/ 457/ /* normalize by shifting x and y so MSB(y) == 1 */ 458/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 459/ normshift = 31 - normshift; 460/ 461/ if (normshift == 0) { 462/ /* no shifting needed, and x < 2*y so q <= 1 */ 463/ y1 = HI(y); 464/ y0 = LO(y); 465/ x1 = HI(x); 466/ x0 = LO(x); 467/ 468/ /* if x >= y then q = 1 (note x1 >= y1) */ 469/ if (x1 > y1 || x0 >= y0) { 470/ q0 = 1; 471/ /* subtract y from x to get remainder */ 472/ /* A_SUB2(y0, y1, x0, x1); */ 473/ } else { 474/ q0 = 0; 475/ } 476/ 477/ /* return result */ 478/ return (q0); 479/ 480/ } else { 481/ /* 482/ * the last case: result is one uint32_t, but we need to 483/ * normalize 484/ */ 485/ uint64_t dt; 486/ uint32_t t0, t1, x2; 487/ 488/ /* normalize y */ 489/ dt = (y << normshift); 490/ y1 = HI(dt); 491/ y0 = LO(dt); 492/ 493/ /* normalize x (we need 3 uint32_ts!!!) */ 494/ x2 = (HI(x) >> (32 - normshift)); 495/ dt = (x << normshift); 496/ x1 = HI(dt); 497/ x0 = LO(dt); 498/ 499/ /* estimate q0, and reduce x to a two uint32_t value */ 500/ A_DIV32(x1, x2, y1, q0, x1); 501/ 502/ /* adjust q0 down if too high */ 503/ /* 504/ * because of the limited range of x2 we can only be 505/ * one off 506/ */ 507/ A_MUL32(y0, q0, t0, t1); 508/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 509/ q0--; 510/ } 511/ /* return result */ 512/ return (q0); 513/ } 514/ } 515/ } 516 ENTRY(UDiv) 517 pushl %ebp 518 pushl %edi 519 pushl %esi 520 subl $40, %esp 521 movl %edx, 36(%esp) / x, x 522 movl 60(%esp), %edx / y, 523 testl %edx, %edx / tmp62 524 movl %eax, 32(%esp) / x, x 525 movl %edx, %ecx / tmp61, tmp62 526 movl %edx, %eax /, tmp61 527 jne .LL26 528 movl 36(%esp), %esi / x, 529 cmpl 56(%esp), %esi / y, tmp67 530 movl %esi, %eax /, tmp67 531 movl %esi, %edx / tmp67, div_hi 532 jb .LL28 533 movl %ecx, %edx / tmp62, div_hi 534 divl 56(%esp) / y 535 movl %eax, %ecx /, q1 536.LL28: 537 xorl %esi, %esi / <result> 538 movl %ecx, %edi / <result>, <result> 539 movl 32(%esp), %eax / x, q0 540 xorl %ecx, %ecx / q0 541 divl 56(%esp) / y 542 addl %eax, %esi / q0, <result> 543 adcl %ecx, %edi / q0, <result> 544.LL25: 545 addl $40, %esp 546 movl %esi, %eax / <result>, <result> 547 popl %esi 548 movl %edi, %edx / <result>, <result> 549 popl %edi 550 popl %ebp 551 ret 552 .align 16 553.LL26: 554 movl 36(%esp), %esi / x, 555 xorl %edi, %edi 556 movl %esi, 24(%esp) / tmp1, 557 movl %edi, 28(%esp) 558 xorl %esi, %esi / <result> 559 xorl %edi, %edi / <result> 560 cmpl %eax, 24(%esp) / tmp61, 561 jb .LL25 562 bsrl %eax,%ebp / tmp61, normshift 563 movl $31, %eax /, tmp85 564 subl %ebp, %eax / normshift, normshift 565 jne .LL32 566 movl 24(%esp), %eax /, x1 567 cmpl %ecx, %eax / tmp62, x1 568 movl 56(%esp), %esi / y, y0 569 movl 32(%esp), %edx / x, x0 570 ja .LL34 571 xorl %eax, %eax / q0 572 cmpl %esi, %edx / y0, x0 573 jb .LL35 574.LL34: 575 movl $1, %eax /, q0 576.LL35: 577 movl %eax, %esi / q0, <result> 578 xorl %edi, %edi / <result> 579.LL45: 580 addl $40, %esp 581 movl %esi, %eax / <result>, <result> 582 popl %esi 583 movl %edi, %edx / <result>, <result> 584 popl %edi 585 popl %ebp 586 ret 587 .align 16 588.LL32: 589 movb %al, %cl 590 movl 56(%esp), %esi / y, 591 movl 60(%esp), %edi / y, 592 shldl %esi, %edi 593 sall %cl, %esi 594 andl $32, %ecx 595 jne .LL43 596.LL40: 597 movl $32, %ecx /, tmp96 598 subl %eax, %ecx / normshift, tmp96 599 movl %edi, %edx 600 movl %edi, 20(%esp) /, dt 601 movl 24(%esp), %ebp /, x2 602 xorl %edi, %edi 603 shrl %cl, %ebp / tmp96, x2 604 movl %esi, 16(%esp) /, dt 605 movb %al, %cl 606 movl 32(%esp), %esi / x, dt 607 movl %edi, 12(%esp) 608 movl 36(%esp), %edi / x, dt 609 shldl %esi, %edi /, dt, dt 610 sall %cl, %esi /, dt 611 andl $32, %ecx 612 movl %edx, 8(%esp) 613 je .LL41 614 movl %esi, %edi / dt, dt 615 xorl %esi, %esi / dt 616.LL41: 617 xorl %ecx, %ecx 618 movl %edi, %eax / tmp1, 619 movl %ebp, %edx / x2, 620 divl 8(%esp) 621 movl %edx, %ebp /, x1 622 movl %ecx, 4(%esp) 623 movl %eax, %ecx /, q0 624 movl 16(%esp), %eax / dt, 625 mull %ecx / q0 626 cmpl %ebp, %edx / x1, t1 627 movl %edi, (%esp) 628 movl %esi, %edi / dt, x0 629 ja .LL38 630 je .LL44 631.LL39: 632 movl %ecx, %esi / q0, <result> 633.LL46: 634 xorl %edi, %edi / <result> 635 jmp .LL45 636.LL44: 637 cmpl %edi, %eax / x0, t0 638 jbe .LL39 639.LL38: 640 decl %ecx / q0 641 movl %ecx, %esi / q0, <result> 642 jmp .LL46 643.LL43: 644 movl %esi, %edi 645 xorl %esi, %esi 646 jmp .LL40 647 SET_SIZE(UDiv) 648 649/* 650 * __udiv64 651 * 652 * Perform division of two unsigned 64-bit quantities, returning the 653 * quotient in %edx:%eax. __udiv64 pops the arguments on return, 654 */ 655 ENTRY(__udiv64) 656 movl 4(%esp), %eax / x, x 657 movl 8(%esp), %edx / x, x 658 pushl 16(%esp) / y 659 pushl 16(%esp) 660 call UDiv 661 addl $8, %esp 662 ret $16 663 SET_SIZE(__udiv64) 664 665/* 666 * __urem64 667 * 668 * Perform division of two unsigned 64-bit quantities, returning the 669 * remainder in %edx:%eax. __urem64 pops the arguments on return 670 */ 671 ENTRY(__urem64) 672 subl $12, %esp 673 movl %esp, %ecx /, tmp65 674 movl 16(%esp), %eax / x, x 675 movl 20(%esp), %edx / x, x 676 pushl %ecx / tmp65 677 pushl 32(%esp) / y 678 pushl 32(%esp) 679 call UDivRem 680 movl 12(%esp), %eax / rem, rem 681 movl 16(%esp), %edx / rem, rem 682 addl $24, %esp 683 ret $16 684 SET_SIZE(__urem64) 685 686/* 687 * __div64 688 * 689 * Perform division of two signed 64-bit quantities, returning the 690 * quotient in %edx:%eax. __div64 pops the arguments on return. 691 */ 692/ int64_t 693/ __div64(int64_t x, int64_t y) 694/ { 695/ int negative; 696/ uint64_t xt, yt, r; 697/ 698/ if (x < 0) { 699/ xt = -(uint64_t) x; 700/ negative = 1; 701/ } else { 702/ xt = x; 703/ negative = 0; 704/ } 705/ if (y < 0) { 706/ yt = -(uint64_t) y; 707/ negative ^= 1; 708/ } else { 709/ yt = y; 710/ } 711/ r = UDiv(xt, yt); 712/ return (negative ? (int64_t) - r : r); 713/ } 714 ENTRY(__div64) 715 pushl %ebp 716 pushl %edi 717 pushl %esi 718 subl $8, %esp 719 movl 28(%esp), %edx / x, x 720 testl %edx, %edx / x 721 movl 24(%esp), %eax / x, x 722 movl 32(%esp), %esi / y, y 723 movl 36(%esp), %edi / y, y 724 js .LL84 725 xorl %ebp, %ebp / negative 726 testl %edi, %edi / y 727 movl %eax, (%esp) / x, xt 728 movl %edx, 4(%esp) / x, xt 729 movl %esi, %eax / y, yt 730 movl %edi, %edx / y, yt 731 js .LL85 732.LL82: 733 pushl %edx / yt 734 pushl %eax / yt 735 movl 8(%esp), %eax / xt, xt 736 movl 12(%esp), %edx / xt, xt 737 call UDiv 738 popl %ecx 739 testl %ebp, %ebp / negative 740 popl %esi 741 je .LL83 742 negl %eax / r 743 adcl $0, %edx /, r 744 negl %edx / r 745.LL83: 746 addl $8, %esp 747 popl %esi 748 popl %edi 749 popl %ebp 750 ret $16 751 .align 16 752.LL84: 753 negl %eax / x 754 adcl $0, %edx /, x 755 negl %edx / x 756 testl %edi, %edi / y 757 movl %eax, (%esp) / x, xt 758 movl %edx, 4(%esp) / x, xt 759 movl $1, %ebp /, negative 760 movl %esi, %eax / y, yt 761 movl %edi, %edx / y, yt 762 jns .LL82 763 .align 16 764.LL85: 765 negl %eax / yt 766 adcl $0, %edx /, yt 767 negl %edx / yt 768 xorl $1, %ebp /, negative 769 jmp .LL82 770 SET_SIZE(__div64) 771 772/* 773 * __rem64 774 * 775 * Perform division of two signed 64-bit quantities, returning the 776 * remainder in %edx:%eax. __rem64 pops the arguments on return. 777 */ 778/ int64_t 779/ __rem64(int64_t x, int64_t y) 780/ { 781/ uint64_t xt, yt, rem; 782/ 783/ if (x < 0) { 784/ xt = -(uint64_t) x; 785/ } else { 786/ xt = x; 787/ } 788/ if (y < 0) { 789/ yt = -(uint64_t) y; 790/ } else { 791/ yt = y; 792/ } 793/ (void) UDivRem(xt, yt, &rem); 794/ return (x < 0 ? (int64_t) - rem : rem); 795/ } 796 ENTRY(__rem64) 797 pushl %edi 798 pushl %esi 799 subl $20, %esp 800 movl 36(%esp), %ecx / x, 801 movl 32(%esp), %esi / x, 802 movl 36(%esp), %edi / x, 803 testl %ecx, %ecx 804 movl 40(%esp), %eax / y, y 805 movl 44(%esp), %edx / y, y 806 movl %esi, (%esp) /, xt 807 movl %edi, 4(%esp) /, xt 808 js .LL92 809 testl %edx, %edx / y 810 movl %eax, %esi / y, yt 811 movl %edx, %edi / y, yt 812 js .LL93 813.LL90: 814 leal 8(%esp), %eax /, tmp66 815 pushl %eax / tmp66 816 pushl %edi / yt 817 pushl %esi / yt 818 movl 12(%esp), %eax / xt, xt 819 movl 16(%esp), %edx / xt, xt 820 call UDivRem 821 addl $12, %esp 822 movl 36(%esp), %edi / x, 823 testl %edi, %edi 824 movl 8(%esp), %eax / rem, rem 825 movl 12(%esp), %edx / rem, rem 826 js .LL94 827 addl $20, %esp 828 popl %esi 829 popl %edi 830 ret $16 831 .align 16 832.LL92: 833 negl %esi 834 adcl $0, %edi 835 negl %edi 836 testl %edx, %edx / y 837 movl %esi, (%esp) /, xt 838 movl %edi, 4(%esp) /, xt 839 movl %eax, %esi / y, yt 840 movl %edx, %edi / y, yt 841 jns .LL90 842 .align 16 843.LL93: 844 negl %esi / yt 845 adcl $0, %edi /, yt 846 negl %edi / yt 847 jmp .LL90 848 .align 16 849.LL94: 850 negl %eax / rem 851 adcl $0, %edx /, rem 852 addl $20, %esp 853 popl %esi 854 negl %edx / rem 855 popl %edi 856 ret $16 857 SET_SIZE(__rem64) 858 859#endif /* __lint */ 860 861#if defined(__lint) 862 863/* 864 * C support for 64-bit modulo and division. 865 * GNU routines callable from C (though generated by the compiler). 866 * Hand-customized compiler output - see comments for details. 867 */ 868/*ARGSUSED*/ 869unsigned long long 870__udivdi3(unsigned long long a, unsigned long long b) 871{ return (0); } 872 873/*ARGSUSED*/ 874unsigned long long 875__umoddi3(unsigned long long a, unsigned long long b) 876{ return (0); } 877 878/*ARGSUSED*/ 879long long 880__divdi3(long long a, long long b) 881{ return (0); } 882 883/*ARGSUSED*/ 884long long 885__moddi3(long long a, long long b) 886{ return (0); } 887 888/* ARGSUSED */ 889int64_t __divrem64(int64_t a, int64_t b) 890{ return (0); } 891 892/* ARGSUSED */ 893uint64_t __udivrem64(uint64_t a, uint64_t b) 894{ return (0); } 895 896#else /* __lint */ 897 898/* 899 * int32_t/int64_t division/manipulation 900 * 901 * Hand-customized compiler output: the non-GCC entry points depart from 902 * the SYS V ABI by requiring their arguments to be popped, and in the 903 * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the 904 * compiler-generated use of %edx:%eax for the first argument of 905 * internal entry points. 906 * 907 * Inlines for speed: 908 * - counting the number of leading zeros in a word 909 * - multiplying two 32-bit numbers giving a 64-bit result 910 * - dividing a 64-bit number by a 32-bit number, giving both quotient 911 * and remainder 912 * - subtracting two 64-bit results 913 */ 914/ #define LO(X) ((uint32_t)(X) & 0xffffffff) 915/ #define HI(X) ((uint32_t)((X) >> 32) & 0xffffffff) 916/ #define HILO(H, L) (((uint64_t)(H) << 32) + (L)) 917/ 918/ /* give index of highest bit */ 919/ #define HIBIT(a, r) \ 920/ asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a)) 921/ 922/ /* multiply two uint32_ts resulting in a uint64_t */ 923/ #define A_MUL32(a, b, lo, hi) \ 924/ asm("mull %2" \ 925/ : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a)) 926/ 927/ /* divide a uint64_t by a uint32_t */ 928/ #define A_DIV32(lo, hi, b, q, r) \ 929/ asm("divl %2" \ 930/ : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \ 931/ : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi)) 932/ 933/ /* subtract two uint64_ts (with borrow) */ 934/ #define A_SUB2(bl, bh, al, ah) \ 935/ asm("subl %4,%0\n\tsbbl %5,%1" \ 936/ : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \ 937/ : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \ 938/ "g"((uint32_t)(bh))) 939 940/* 941 * __udivdi3 942 * 943 * Perform division of two unsigned 64-bit quantities, returning the 944 * quotient in %edx:%eax. 945 */ 946 ENTRY(__udivdi3) 947 movl 4(%esp), %eax / x, x 948 movl 8(%esp), %edx / x, x 949 pushl 16(%esp) / y 950 pushl 16(%esp) 951 call UDiv 952 addl $8, %esp 953 ret 954 SET_SIZE(__udivdi3) 955 956/* 957 * __umoddi3 958 * 959 * Perform division of two unsigned 64-bit quantities, returning the 960 * remainder in %edx:%eax. 961 */ 962 ENTRY(__umoddi3) 963 subl $12, %esp 964 movl %esp, %ecx /, tmp65 965 movl 16(%esp), %eax / x, x 966 movl 20(%esp), %edx / x, x 967 pushl %ecx / tmp65 968 pushl 32(%esp) / y 969 pushl 32(%esp) 970 call UDivRem 971 movl 12(%esp), %eax / rem, rem 972 movl 16(%esp), %edx / rem, rem 973 addl $24, %esp 974 ret 975 SET_SIZE(__umoddi3) 976 977/* 978 * __divdi3 979 * 980 * Perform division of two signed 64-bit quantities, returning the 981 * quotient in %edx:%eax. 982 */ 983/ int64_t 984/ __divdi3(int64_t x, int64_t y) 985/ { 986/ int negative; 987/ uint64_t xt, yt, r; 988/ 989/ if (x < 0) { 990/ xt = -(uint64_t) x; 991/ negative = 1; 992/ } else { 993/ xt = x; 994/ negative = 0; 995/ } 996/ if (y < 0) { 997/ yt = -(uint64_t) y; 998/ negative ^= 1; 999/ } else { 1000/ yt = y; 1001/ } 1002/ r = UDiv(xt, yt); 1003/ return (negative ? (int64_t) - r : r); 1004/ } 1005 ENTRY(__divdi3) 1006 pushl %ebp 1007 pushl %edi 1008 pushl %esi 1009 subl $8, %esp 1010 movl 28(%esp), %edx / x, x 1011 testl %edx, %edx / x 1012 movl 24(%esp), %eax / x, x 1013 movl 32(%esp), %esi / y, y 1014 movl 36(%esp), %edi / y, y 1015 js .LL55 1016 xorl %ebp, %ebp / negative 1017 testl %edi, %edi / y 1018 movl %eax, (%esp) / x, xt 1019 movl %edx, 4(%esp) / x, xt 1020 movl %esi, %eax / y, yt 1021 movl %edi, %edx / y, yt 1022 js .LL56 1023.LL53: 1024 pushl %edx / yt 1025 pushl %eax / yt 1026 movl 8(%esp), %eax / xt, xt 1027 movl 12(%esp), %edx / xt, xt 1028 call UDiv 1029 popl %ecx 1030 testl %ebp, %ebp / negative 1031 popl %esi 1032 je .LL54 1033 negl %eax / r 1034 adcl $0, %edx /, r 1035 negl %edx / r 1036.LL54: 1037 addl $8, %esp 1038 popl %esi 1039 popl %edi 1040 popl %ebp 1041 ret 1042 .align 16 1043.LL55: 1044 negl %eax / x 1045 adcl $0, %edx /, x 1046 negl %edx / x 1047 testl %edi, %edi / y 1048 movl %eax, (%esp) / x, xt 1049 movl %edx, 4(%esp) / x, xt 1050 movl $1, %ebp /, negative 1051 movl %esi, %eax / y, yt 1052 movl %edi, %edx / y, yt 1053 jns .LL53 1054 .align 16 1055.LL56: 1056 negl %eax / yt 1057 adcl $0, %edx /, yt 1058 negl %edx / yt 1059 xorl $1, %ebp /, negative 1060 jmp .LL53 1061 SET_SIZE(__divdi3) 1062 1063/* 1064 * __moddi3 1065 * 1066 * Perform division of two signed 64-bit quantities, returning the 1067 * quotient in %edx:%eax. 1068 */ 1069/ int64_t 1070/ __moddi3(int64_t x, int64_t y) 1071/ { 1072/ uint64_t xt, yt, rem; 1073/ 1074/ if (x < 0) { 1075/ xt = -(uint64_t) x; 1076/ } else { 1077/ xt = x; 1078/ } 1079/ if (y < 0) { 1080/ yt = -(uint64_t) y; 1081/ } else { 1082/ yt = y; 1083/ } 1084/ (void) UDivRem(xt, yt, &rem); 1085/ return (x < 0 ? (int64_t) - rem : rem); 1086/ } 1087 ENTRY(__moddi3) 1088 pushl %edi 1089 pushl %esi 1090 subl $20, %esp 1091 movl 36(%esp), %ecx / x, 1092 movl 32(%esp), %esi / x, 1093 movl 36(%esp), %edi / x, 1094 testl %ecx, %ecx 1095 movl 40(%esp), %eax / y, y 1096 movl 44(%esp), %edx / y, y 1097 movl %esi, (%esp) /, xt 1098 movl %edi, 4(%esp) /, xt 1099 js .LL63 1100 testl %edx, %edx / y 1101 movl %eax, %esi / y, yt 1102 movl %edx, %edi / y, yt 1103 js .LL64 1104.LL61: 1105 leal 8(%esp), %eax /, tmp66 1106 pushl %eax / tmp66 1107 pushl %edi / yt 1108 pushl %esi / yt 1109 movl 12(%esp), %eax / xt, xt 1110 movl 16(%esp), %edx / xt, xt 1111 call UDivRem 1112 addl $12, %esp 1113 movl 36(%esp), %edi / x, 1114 testl %edi, %edi 1115 movl 8(%esp), %eax / rem, rem 1116 movl 12(%esp), %edx / rem, rem 1117 js .LL65 1118 addl $20, %esp 1119 popl %esi 1120 popl %edi 1121 ret 1122 .align 16 1123.LL63: 1124 negl %esi 1125 adcl $0, %edi 1126 negl %edi 1127 testl %edx, %edx / y 1128 movl %esi, (%esp) /, xt 1129 movl %edi, 4(%esp) /, xt 1130 movl %eax, %esi / y, yt 1131 movl %edx, %edi / y, yt 1132 jns .LL61 1133 .align 16 1134.LL64: 1135 negl %esi / yt 1136 adcl $0, %edi /, yt 1137 negl %edi / yt 1138 jmp .LL61 1139 .align 16 1140.LL65: 1141 negl %eax / rem 1142 adcl $0, %edx /, rem 1143 addl $20, %esp 1144 popl %esi 1145 negl %edx / rem 1146 popl %edi 1147 ret 1148 SET_SIZE(__moddi3) 1149 1150/* 1151 * __udivrem64 1152 * 1153 * Perform division of two unsigned 64-bit quantities, returning the 1154 * quotient in %edx:%eax, and the remainder in %ecx:%esi. __udivrem64 1155 * pops the arguments on return. 1156 */ 1157 ENTRY(__udivrem64) 1158 subl $12, %esp 1159 movl %esp, %ecx /, tmp64 1160 movl 16(%esp), %eax / x, x 1161 movl 20(%esp), %edx / x, x 1162 pushl %ecx / tmp64 1163 pushl 32(%esp) / y 1164 pushl 32(%esp) 1165 call UDivRem 1166 movl 16(%esp), %ecx / rem, tmp63 1167 movl 12(%esp), %esi / rem 1168 addl $24, %esp 1169 ret $16 1170 SET_SIZE(__udivrem64) 1171 1172/* 1173 * Signed division with remainder. 1174 */ 1175/ int64_t 1176/ SDivRem(int64_t x, int64_t y, int64_t * pmod) 1177/ { 1178/ int negative; 1179/ uint64_t xt, yt, r, rem; 1180/ 1181/ if (x < 0) { 1182/ xt = -(uint64_t) x; 1183/ negative = 1; 1184/ } else { 1185/ xt = x; 1186/ negative = 0; 1187/ } 1188/ if (y < 0) { 1189/ yt = -(uint64_t) y; 1190/ negative ^= 1; 1191/ } else { 1192/ yt = y; 1193/ } 1194/ r = UDivRem(xt, yt, &rem); 1195/ *pmod = (x < 0 ? (int64_t) - rem : rem); 1196/ return (negative ? (int64_t) - r : r); 1197/ } 1198 ENTRY(SDivRem) 1199 pushl %ebp 1200 pushl %edi 1201 pushl %esi 1202 subl $24, %esp 1203 testl %edx, %edx / x 1204 movl %edx, %edi / x, x 1205 js .LL73 1206 movl 44(%esp), %esi / y, 1207 xorl %ebp, %ebp / negative 1208 testl %esi, %esi 1209 movl %edx, 12(%esp) / x, xt 1210 movl %eax, 8(%esp) / x, xt 1211 movl 40(%esp), %edx / y, yt 1212 movl 44(%esp), %ecx / y, yt 1213 js .LL74 1214.LL70: 1215 leal 16(%esp), %eax /, tmp70 1216 pushl %eax / tmp70 1217 pushl %ecx / yt 1218 pushl %edx / yt 1219 movl 20(%esp), %eax / xt, xt 1220 movl 24(%esp), %edx / xt, xt 1221 call UDivRem 1222 movl %edx, 16(%esp) /, r 1223 movl %eax, 12(%esp) /, r 1224 addl $12, %esp 1225 testl %edi, %edi / x 1226 movl 16(%esp), %edx / rem, rem 1227 movl 20(%esp), %ecx / rem, rem 1228 js .LL75 1229.LL71: 1230 movl 48(%esp), %edi / pmod, pmod 1231 testl %ebp, %ebp / negative 1232 movl %edx, (%edi) / rem,* pmod 1233 movl %ecx, 4(%edi) / rem, 1234 movl (%esp), %eax / r, r 1235 movl 4(%esp), %edx / r, r 1236 je .LL72 1237 negl %eax / r 1238 adcl $0, %edx /, r 1239 negl %edx / r 1240.LL72: 1241 addl $24, %esp 1242 popl %esi 1243 popl %edi 1244 popl %ebp 1245 ret 1246 .align 16 1247.LL73: 1248 negl %eax 1249 adcl $0, %edx 1250 movl 44(%esp), %esi / y, 1251 negl %edx 1252 testl %esi, %esi 1253 movl %edx, 12(%esp) /, xt 1254 movl %eax, 8(%esp) /, xt 1255 movl $1, %ebp /, negative 1256 movl 40(%esp), %edx / y, yt 1257 movl 44(%esp), %ecx / y, yt 1258 jns .LL70 1259 .align 16 1260.LL74: 1261 negl %edx / yt 1262 adcl $0, %ecx /, yt 1263 negl %ecx / yt 1264 xorl $1, %ebp /, negative 1265 jmp .LL70 1266 .align 16 1267.LL75: 1268 negl %edx / rem 1269 adcl $0, %ecx /, rem 1270 negl %ecx / rem 1271 jmp .LL71 1272 SET_SIZE(SDivRem) 1273 1274/* 1275 * __divrem64 1276 * 1277 * Perform division of two signed 64-bit quantities, returning the 1278 * quotient in %edx:%eax, and the remainder in %ecx:%esi. __divrem64 1279 * pops the arguments on return. 1280 */ 1281 ENTRY(__divrem64) 1282 subl $20, %esp 1283 movl %esp, %ecx /, tmp64 1284 movl 24(%esp), %eax / x, x 1285 movl 28(%esp), %edx / x, x 1286 pushl %ecx / tmp64 1287 pushl 40(%esp) / y 1288 pushl 40(%esp) 1289 call SDivRem 1290 movl 16(%esp), %ecx 1291 movl 12(%esp),%esi / rem 1292 addl $32, %esp 1293 ret $16 1294 SET_SIZE(__divrem64) 1295 1296 1297#endif /* __lint */ 1298 1299#endif /* defined(__i386) && !defined(__amd64) */ 1300